From 9174e46a86fe56f9557bdfa97804323c0840681a Mon Sep 17 00:00:00 2001 From: Alexander Bigerl Date: Fri, 22 Jan 2021 15:21:06 +0100 Subject: [PATCH] merged partial_grammer_parsing --- .clang-format | 68 + .dockerignore | 162 ++ .gitignore | 121 +- .travis.yml | 36 - CMakeLists.txt | 46 +- Dockerfile | 69 + README.md | 96 +- conanfile.py | 14 +- include/Dice/RDF/ParseTerm.hpp | 43 + include/Dice/RDF/Term.hpp | 286 +++ include/Dice/RDF/Triple.hpp | 53 + include/Dice/RDF/internal/AbstractTriple.hpp | 65 + include/Dice/SPARQL/TriplePattern.hpp | 79 + include/Dice/SPARQL/Variable.hpp | 87 + include/Dice/rdf-parser/TurtleFileParser.hpp | 151 ++ .../Dice/rdf-parser/TurtleStringParser.hpp | 64 + .../exception/RDFParsingExecption.hpp | 13 + .../internal/Turtle/Actions/Actions.hpp | 127 ++ .../internal/Turtle/Actions/BasicActions.hpp | 359 +++ .../internal/Turtle/Configurations.hpp | 9 + .../rdf-parser/internal/Turtle/Grammar.hpp | 412 ++++ .../Turtle/Parsers/AbstractParser.hpp | 108 + .../Turtle/Parsers/BaseStringParser.hpp | 90 + .../internal/Turtle/States/BasicState.hpp | 144 ++ .../Turtle/States/ConcurrentState.hpp | 101 + .../Turtle/States/SequentialState.hpp | 49 + .../internal/Turtle/States/State.hpp | 200 ++ .../internal/exception/InternalError.hpp | 14 + .../rdf-parser/internal/util/ScopedThread.hpp | 26 + .../Parser/Turtle/Actions/Actions.hpp | 146 -- .../Parser/Turtle/Actions/BasicActions.hpp | 367 --- .../Dice/rdf_parser/Parser/Turtle/Grammer.hpp | 519 ----- .../Turtle/Parsers/ConcurrentStreamParser.hpp | 139 -- .../Parser/Turtle/Parsers/FileParser.hpp | 105 - .../Parser/Turtle/Parsers/StreamParser.hpp | 101 - .../Parser/Turtle/Parsers/StringParser.hpp | 130 -- .../Parser/Turtle/Parsers/TriplesParser.hpp | 109 - .../Parser/Turtle/States/BasicState.hpp | 171 -- .../Parser/Turtle/States/ConcurrentState.hpp | 101 - .../rdf_parser/Parser/Turtle/States/State.hpp | 210 -- include/Dice/rdf_parser/RDF/Term.hpp | 373 ---- include/Dice/rdf_parser/RDF/Triple.hpp | 47 - .../Sparql/TriplePatternElement.hpp | 39 - .../Dice/rdf_parser/Sparql/TripleVariable.hpp | 52 - .../Dice/rdf_parser/util/scoped_thread.hpp | 28 - .../internal/TriplesBlockStringParser.hpp | 39 + tests/CMakeLists.txt | 61 +- tests/SparqlTriplesBlockParsingTests.cpp | 91 + tests/TermParserTests.cpp | 27 - tests/TermTests.cpp | 157 +- tests/Tests.cpp | 22 +- tests/TurtleOfficalExamplesTest.cpp | 1051 ++++----- tests/TurtleOfficialEvaluationTests.cpp | 1974 ++++++++--------- .../TurtleOfficialNegativeEvaluationTests.cpp | 31 +- tests/TurtleOfficialNegativeTests.cpp | 865 ++++---- tests/TurtleOfficialPositiveTests.cpp | 1179 +++++----- tests/TurtleParserConcurrentTests.cpp | 26 - tests/TurtleParserFilesTests.cpp | 29 +- tests/TurtlePartialGrammerTests.cpp | 77 - 59 files changed, 5770 insertions(+), 5588 deletions(-) create mode 100644 .clang-format create mode 100644 .dockerignore delete mode 100644 .travis.yml create mode 100644 Dockerfile create mode 100644 include/Dice/RDF/ParseTerm.hpp create mode 100644 include/Dice/RDF/Term.hpp create mode 100644 include/Dice/RDF/Triple.hpp create mode 100644 include/Dice/RDF/internal/AbstractTriple.hpp create mode 100644 include/Dice/SPARQL/TriplePattern.hpp create mode 100644 include/Dice/SPARQL/Variable.hpp create mode 100644 include/Dice/rdf-parser/TurtleFileParser.hpp create mode 100644 include/Dice/rdf-parser/TurtleStringParser.hpp create mode 100644 include/Dice/rdf-parser/exception/RDFParsingExecption.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/Actions/Actions.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/Actions/BasicActions.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/Configurations.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/Grammar.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/Parsers/AbstractParser.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/Parsers/BaseStringParser.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/States/BasicState.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/States/ConcurrentState.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/States/SequentialState.hpp create mode 100644 include/Dice/rdf-parser/internal/Turtle/States/State.hpp create mode 100644 include/Dice/rdf-parser/internal/exception/InternalError.hpp create mode 100644 include/Dice/rdf-parser/internal/util/ScopedThread.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Actions/BasicActions.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Grammer.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Parsers/ConcurrentStreamParser.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Parsers/FileParser.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Parsers/StreamParser.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Parsers/StringParser.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/Parsers/TriplesParser.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/States/BasicState.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/States/ConcurrentState.hpp delete mode 100644 include/Dice/rdf_parser/Parser/Turtle/States/State.hpp delete mode 100644 include/Dice/rdf_parser/RDF/Term.hpp delete mode 100644 include/Dice/rdf_parser/RDF/Triple.hpp delete mode 100644 include/Dice/rdf_parser/Sparql/TriplePatternElement.hpp delete mode 100644 include/Dice/rdf_parser/Sparql/TripleVariable.hpp delete mode 100644 include/Dice/rdf_parser/util/scoped_thread.hpp create mode 100644 include/Dice/sparql-parser/internal/TriplesBlockStringParser.hpp create mode 100644 tests/SparqlTriplesBlockParsingTests.cpp delete mode 100644 tests/TermParserTests.cpp delete mode 100644 tests/TurtleParserConcurrentTests.cpp delete mode 100644 tests/TurtlePartialGrammerTests.cpp diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..4ae554e --- /dev/null +++ b/.clang-format @@ -0,0 +1,68 @@ +# Generated from CLion C/C++ Code Style settings +BasedOnStyle: LLVM +Language: Cpp +Standard: c++20 +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignOperands: true +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Always +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Always +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterReturnType: None +AlwaysBreakTemplateDeclarations: Yes +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: true +BreakBeforeBinaryOperators: None +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +ColumnLimit: 0 +CompactNamespaces: false +ContinuationIndentWidth: 8 +IndentCaseLabels: true +IndentPPDirectives: None +IndentWidth: 4 +KeepEmptyLinesAtTheStartOfBlocks: true +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: All +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PointerAlignment: Right +ReflowComments: false +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 0 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +TabWidth: 4 +UseTab: ForContinuationAndIndentation diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1105130 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,162 @@ + +# Created by https://www.toptal.com/developers/gitignore/api/c++,conan,jetbrains+all,cmake +# Edit at https://www.toptal.com/developers/gitignore?templates=c++,conan,jetbrains+all,cmake + +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Linker files +*.ilk + +# Debugger Files +*.pdb + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### CMake ### +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps +CMakeUserPresets.json + +### CMake Patch ### +# External projects +*-prefix/ + +### Conan ### +# Conan build information +conan.lock +conanbuildinfo.* +conaninfo.txt +graph_info.json + +### JetBrains+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### JetBrains+all Patch ### +# Ignores the whole .idea folder and all .iml files +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 + +*.iml +modules.xml +.idea/misc.xml +*.ipr + +# Sonarlint plugin +.idea/sonarlint + +# End of https://www.toptal.com/developers/gitignore/api/c++,conan,jetbrains+all,cmake + +# docu folder +/docu/ diff --git a/.gitignore b/.gitignore index 51d2cd2..1105130 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,8 @@ -#c++ : + +# Created by https://www.toptal.com/developers/gitignore/api/c++,conan,jetbrains+all,cmake +# Edit at https://www.toptal.com/developers/gitignore?templates=c++,conan,jetbrains+all,cmake + +### C++ ### # Prerequisites *.d @@ -12,6 +16,12 @@ *.gch *.pch +# Linker files +*.ilk + +# Debugger Files +*.pdb + # Compiled Dynamic libraries *.so *.dylib @@ -32,7 +42,7 @@ *.out *.app -#cmake: +### CMake ### CMakeLists.txt.user CMakeCache.txt CMakeFiles @@ -44,6 +54,109 @@ install_manifest.txt compile_commands.json CTestTestfile.cmake _deps +CMakeUserPresets.json + +### CMake Patch ### +# External projects +*-prefix/ + +### Conan ### +# Conan build information +conan.lock +conanbuildinfo.* +conaninfo.txt +graph_info.json + +### JetBrains+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### JetBrains+all Patch ### +# Ignores the whole .idea folder and all .iml files +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 + +*.iml +modules.xml +.idea/misc.xml +*.ipr + +# Sonarlint plugin +.idea/sonarlint + +# End of https://www.toptal.com/developers/gitignore/api/c++,conan,jetbrains+all,cmake -#INTELLIJ -.idea \ No newline at end of file +# docu folder +/docu/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a3e1f33..0000000 --- a/.travis.yml +++ /dev/null @@ -1,36 +0,0 @@ -dist: xenial -language: cpp - -matrix: - include: - - compiler: gcc - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-9 - - env: - - COMPILER=g++-9 - - CXX=g++-9 - - CC=gcc-9 - - - cache: - directories: - - $HOME/.conan/data - - before_script: - - sudo apt install python3-pip - - pip install conan --user - - conan remote add conan-public "https://api.bintray.com/conan/conan/public-conan" - - conan remote add taocpp "https://api.bintray.com/conan/taocpp/public-conan" - - conan remote add bincrafters https://api.bintray.com/conan/bincrafters/public-conan - script: - - mkdir build - - cd build - - conan install .. --build=missing --settings compiler.libcxx="libstdc++11" - - cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -Drdf_parser_BUILD_TESTS=ON .. - - make -j - - ./tests/bin/tests \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 784bd4f..8aef51d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ -cmake_minimum_required(VERSION 3.12) -project(rdf_parser VERSION 0.11.0) -set(CMAKE_CXX_STANDARD 17) +cmake_minimum_required(VERSION 3.14) +project(rdf-parser VERSION 0.12.0) +set(CMAKE_CXX_STANDARD 20) include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) @@ -8,45 +8,45 @@ conan_basic_setup() # installation directories -set(rdf_parser_INSTALL_INCLUDE_DIR "include" CACHE STRING "The installation include directory") -set(rdf_parser_INSTALL_CMAKE_DIR "share/rdf_parser/cmake" CACHE STRING "The installation cmake directory") +set(rdf-parser_INSTALL_INCLUDE_DIR "include" CACHE STRING "The installation include directory") +set(rdf-parser_INSTALL_CMAKE_DIR "share/rdf-parser/cmake" CACHE STRING "The installation cmake directory") # define a header-only library -add_library(rdf_parser INTERFACE) -add_library(Dice::rdf_parser ALIAS rdf_parser ) +add_library(rdf-parser INTERFACE) +add_library(Dice::rdf-parser ALIAS rdf-parser) -target_include_directories(rdf_parser INTERFACE +target_include_directories(rdf-parser INTERFACE $ - $ + $ ) -target_link_libraries(rdf_parser INTERFACE ${CONAN_LIBS}) +target_link_libraries(rdf-parser INTERFACE ${CONAN_LIBS}) # testing -option(rdf_parser_BUILD_TESTS "Build rdf_parser tests." ON) -if(rdf_parser_BUILD_TESTS) +option(RDF_PARSER_BUILD_TESTS "Build rdf-parser tests." OFF) +if (RDF_PARSER_BUILD_TESTS) enable_testing() add_subdirectory(tests) -endif() +endif () -# require C++17 -target_compile_features(rdf_parser INTERFACE cxx_std_17) +# require C++20 +target_compile_features(rdf-parser INTERFACE cxx_std_20) # Make package findable -configure_file(cmake/dummy-config.cmake.in rdf_parser-config.cmake @ONLY) +configure_file(cmake/dummy-config.cmake.in rdf-parser-config.cmake @ONLY) # Enable version checks in find_package include(CMakePackageConfigHelpers) -write_basic_package_version_file(rdf_parser-config-version.cmake COMPATIBILITY SameMajorVersion) +write_basic_package_version_file(rdf-parser-config-version.cmake COMPATIBILITY SameMajorVersion) # install and export target -install(TARGETS rdf_parser EXPORT rdf_parser-targets) +install(TARGETS rdf-parser EXPORT rdf-parser-targets) -install(EXPORT rdf_parser-targets - FILE rdf_parser-config.cmake +install(EXPORT rdf-parser-targets + FILE rdf-parser-config.cmake NAMESPACE Dice:: - DESTINATION ${rdf_parser_INSTALL_CMAKE_DIR} + DESTINATION ${rdf-parser_INSTALL_CMAKE_DIR} ) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/rdf_parser-config-version.cmake DESTINATION ${rdf_parser_INSTALL_CMAKE_DIR}) -install(DIRECTORY include/ DESTINATION ${rdf_parser_INSTALL_INCLUDE_DIR}) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/rdf-parser-config-version.cmake DESTINATION ${rdf-parser_INSTALL_CMAKE_DIR}) +install(DIRECTORY include/ DESTINATION ${rdf-parser_INSTALL_INCLUDE_DIR}) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ae1d230 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,69 @@ +FROM ubuntu:focal AS builder +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get -qq update +RUN apt-get -qq install -y make cmake git libstdc++-10-dev g++-10 clang-11 +RUN apt-get -qq install -y python3-pip python3-setuptools python3-wheel +# uuid-dev pkg-config openjdk-11-jdk + +# setup conan +RUN pip3 install conan +RUN conan user && \ + conan profile new --detect default && \ + conan profile update settings.compiler.libcxx=libstdc++11 default && \ + conan profile update settings.compiler.version=10 default && \ + conan profile new --detect clang11 && \ + conan profile update settings.compiler=clang clang11 &&\ + conan profile update settings.compiler.version=11 clang11 && \ + conan profile update settings.compiler.libcxx=libstdc++11 clang11 + +RUN conan remote add dice "https://api.bintray.com/conan/dice-group/tentris" + +WORKDIR /rdf-parser +COPY CMakeLists.txt CMakeLists.txt +COPY tests tests +COPY include include +COPY cmake cmake +COPY conanfile.py conanfile.py + +#gcc + +WORKDIR /rdf-parser/build_gcc + +ENV CXX="g++-10" +ENV CC="gcc10" + +RUN conan install .. --build=missing -o rdf-parser:with_tests=True + +## change working directory +WORKDIR /rdf-parser/build_gcc + +# +# run cmake +RUN CC=gcc-10 CXX=g++-10 cmake -DCMAKE_BUILD_TYPE=Release -DRDF_PARSER_BUILD_TESTS=ON .. +# build +RUN make -j $(nproc) + +#for clang +## change working directory +WORKDIR /rdf-parser/build_clang + +ENV CXX="clang++-11" +ENV CC="clang-11" + +RUN conan install .. --build=missing --profile clang11 -o rdf-parser:with_tests=True +# run cmake +RUN CC=clang-11 CXX=clang++-11 cmake -DCMAKE_BUILD_TYPE=Release -DRDF_PARSER_BUILD_TESTS=ON .. +# build +RUN make -j $(nproc) + +WORKDIR /rdf-parser/build_clang +RUN ./bin/tests + +WORKDIR /rdf-parser/build_gcc +RUN ./bin/tests + +WORKDIR /rdf-parser/ + +#RUN conan create . "rdf-parser/test1@dice-group/stable" --build missing +#RUN conan create . "rdf-parser/test2dice-group/stable" --build missing --profile clang11 \ No newline at end of file diff --git a/README.md b/README.md index 3eacd0a..46de946 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # Tentris RDF Parser -This is the RDF parser used by [Tentris](https://github.com/dice-group/tentris). It uses [PEGTL](https://github.com/taocpp/PEGTL), Parsing Expression Grammar Library, to parse RDF files (currently supported: ntriple, turtle). +This is the RDF parser used by [Tentris](https://github.com/dice-group/tentris). It uses [PEGTL](https://github.com/taocpp/PEGTL), Parsing Expression Grammar Library, to parse RDF files (currently supported: ntriple, turtle). + It also supports parsing the triplesBlock part of Sparql queries. It has buffered streaming support so that you can load files that are too big to fit in main memory. -Currently it is not fully supporting comments.it fails to parse comments in some senarios . +Currently it is not fully supporting comments. It fails to parse comments in some scenarios. ## how to build ### prerequisites @@ -28,7 +29,8 @@ make -j ### conan -**TODO: update this as soon as it is published** + +recipe : rdf-parser/0.12.0@dice-group/stable ``` conan remote add dice "https://api.bintray.com/conan/dice-group/tentris" @@ -39,66 +41,94 @@ conan remote add dice "https://api.bintray.com/conan/dice-group/tentris" ### Introduction A header-only library designed to allow efficient and controlled parsing for RDF languages. -Currently it supports ntriples and turtle languages . +Currently it supports ntriples and turtle languages, as well as triplesBlock part of sparql queries. It uses tao's PEGTL library for parsing. -### Usage +### Usage + +The library is easy to use, for parsing turtle or ntriples include the header for the required parser. +for example, to use the File parser, include `` -The library is easy to use, for parsing turtle or ntriples include the header `` and use the class `TurtleParser<>` (which is a wrapper to use any parser type) for parsing. -`TurtleParser` accept one template parameter which determine the parser type (more information about that on the next section), and it's constructor accept one string parameter which is either the string which is the document to be parsed in case of `StringParser` or the filename of the file that contains the document to be parsed. +Rdf parsers' constructor accept one string parameter which is either the string which is the document to be parsed in case of `TurtleStringParser` or the filename of the file that contains the document to be parsed oin case of `TurtleFileParser` . Example : ```c++ -TurtleParser turtleParser("datasets/g.nt"); + TurtleFileParser parser("datasets/g.nt"); ``` where `"datasets/g.nt"` is the file name of the file we want to parse. Now an iterator can be created: ```c++ -auto iterator=turtleParser.begin(); +auto iterator=parser.begin(); ``` We can iterate over the triples and used the triples: ```c++ -while(it) -{ - Triple triple= *it; - it++; +for(const Triple &triple : parser) { + // do something with triple + std::cout << triple.subject().getIdentifier() << " " << triple.predicate().getIdentifier() << " " << triple.object().getIdentifier() << std::endl; } ``` ### Parsers types There are four types of parsers which can be used: -- `StringParser`: It is used for parsing Strings immediately. It accepts one parameter which is the string of the document to be parsed. -- `FileParser`: It is used for parsing a whole document file at a time. The parsed triples cannot be accessed before the parsing of the whole document is done. It is faster than the `StreamParser` (below) but require much more memory and thus may run out of memory for large inputs. -- `StreamParser`: It is used for parsing streams or large files. It can process very big files with low memory usage by parsing chunk by chunk. However, triples are not accessible until the parsing is done. -- `ConcurrentStreamParser`: It can be used the same way as the `StreamParser` but uses a separated thread for parsing and uses Intel's TBB `concurrent_queue` -for storing the parsed triples. Therefore, the already parsed triples can be accessed during the parsing process. It is the default template parameter for `TurtleParser`. -And it is the recommended parser to use for large files or streams and for simultaneous parsing and accessing to the already parsed triples. +- `TurtleStringParser`: It can be used to parse Rdf Strings immediately. It accepts one parameter which is the string of the document to be parsed. +- `TurtleFileParser`: It can be used to parse a whole document file that contains a Rdf. It can process very big files with low memory usage by parsing chunk by chunk. It also uses a separated thread for parsing and writes the results in a concurrent queue. +Therefore, the already parsed triples can be accessed during the parsing process. It accepts one parameter which is the name of the file. -### Example -Here we create a full example for parsing a turtle file. It is by default using the `ConcurrentStreamParser`. We assuming there is an turtle file at `datasets/dataset1.ttl`. The file is parsed and the triples are printed to `std::out`. +- `TriplesBlockStringParser`: It is used for parsing Sparql's TripleBlocks Strings immediately. It accepts one parameter which is the string of the document to be parsed. And another +optional parameter which is either a std::unordered_map (or robin_hood::unordered_map which is more opimized) contains the prefixes. + +### Examples -```c++ -#include +1-Here we create a full example for parsing a turtle file. we use here the `TurtleFileParser`. We are assuming there is an turtle file at `datasets/dataset1.ttl`. The file is parsed and the triples are printed to `std::out`. +Dice::rdf_parser::Turtle::parsers + ```c++ -namespace { - using namespace rdf_parser::Turtle; - using namespace rdf_parser::store::rdf; -} +#include int main() { - TurtleParser<> Parser("datasets/dataset1.ttl"); +Dice::rdf_parser::Turtle::parsers::TurtleFileParser parser("datasets/dataset1.ttl"); auto it=Parser.begin(); - //or we can use: TurtleParser<>::Iterator it=Parser.begin(); while (it){ - Triple triple= *it; - //or const Triple &integerNumber = *iterator; to get a const ref - std::cout<<"subject :"< +#include + +int main() +{ +//create a map of prefixes +robin_hood::unordered_map prefixes; +prefixes.emplace("wde","http://www.wikidata.org/entity/"); +prefixes.emplace("wdt","http://www.wikidata.org/prop/direct/"); + +// create the parser with 2 parameters: the query and the prefixes map +Dice::sparql_parser::internal::TriplesBlockStringParser parser("?var1 _:b0 . _:b0 wde:Q202479 ; ?var2 .", prefixes) ; + +//get an iterator +auto it= parser.begin(); + +while (it) +{ + TriplePatternElement triplePatternElement=*it; + Dice::sparql::VarOrTerm subject=TriplePatternElement.subject(); + Dice::sparql::VarOrTerm subject=TriplePatternElement.predicate(); + Dice::sparql::VarOrTerm subject=TriplePatternElement.object(); + it++; +} +} +``` diff --git a/conanfile.py b/conanfile.py index 4742457..f4c6b60 100644 --- a/conanfile.py +++ b/conanfile.py @@ -1,26 +1,34 @@ from conans import ConanFile, CMake +from conans.tools import load +import re, os class RDFParser(ConanFile): name = "rdf-parser" - version = "0.11" author = "DICE Group " description = "RDF parser used by [Tentris](https://github.com/dice-group/tentris). It uses [PEGTL](https://github.com/taocpp/PEGTL), Parsing Expression Grammar Library, to parse RDF files (currently supported: ntriple, turtle). " homepage = "https://github.com/dice-group/rdf-parser" url = homepage license = "AGPL" + options = {'with_tests': [False, True]} + default_options = {'with_tests': False} topics = ("dice-group", "RDF", "parser", "semantic web", "turtle", "ntriple") settings = "build_type", "compiler", "os", "arch" - requires = "boost/1.71.0@conan/stable", "pegtl/2.8.1@taocpp/stable", "gtest/1.8.1@bincrafters/stable", "fmt/6.0.0@bincrafters/stable", "abseil/20181200@bincrafters/stable" + requires = "boost/1.75.0", "taocpp-pegtl/3.1.0", "fmt/7.1.2", "dice-hash/0.1.0@dice-group/stable" , "robin-hood-hashing/3.9.1" generators = "cmake", "cmake_find_package", "cmake_paths" exports = "LICENSE" exports_sources = "include/*", "CMakeLists.txt", "cmake/dummy-config.cmake.in" no_copy_source = True + def set_version(self): + if not hasattr(self, 'version') or self.version is None: + cmake_file = load(os.path.join(self.recipe_folder, "CMakeLists.txt")) + self.version = re.search("project\(rdf-parser VERSION (.*)\)", cmake_file).group(1) def package(self): cmake = CMake(self) - cmake.definitions["rdf_parser_BUILD_TESTS"] = "OFF" + if self.options.with_tests: + cmake.definitions["RDF_PARSER_BUILD_TESTS"] = "ON" cmake.configure() cmake.install() diff --git a/include/Dice/RDF/ParseTerm.hpp b/include/Dice/RDF/ParseTerm.hpp new file mode 100644 index 0000000..83e98aa --- /dev/null +++ b/include/Dice/RDF/ParseTerm.hpp @@ -0,0 +1,43 @@ +#ifndef RDF_PARSER_MAKE_TERM_HPP +#define RDF_PARSER_MAKE_TERM_HPP + +#include "Dice/RDF/Term.hpp" +#include "Dice/rdf-parser/internal/Turtle/Actions/BasicActions.hpp" +#include "Dice/rdf-parser/internal/Turtle/Grammar.hpp" +#include "Dice/rdf-parser/internal/Turtle/States/BasicState.hpp" + +namespace Dice::rdf { + /** + * Parses a RDF Term string. + * @param identifier rdf term string + * @throws std::logic_error is thrown if identifier is not parsable + * @return RDF Term + */ + inline Term parse_term(const std::string &identifier) { + try { + using namespace tao::pegtl; + string_input input(identifier, "the text"); + Dice::rdf_parser::internal::Turtle::States::BasicState state; + parse(input, state); + return std::move(state.getElement()); + } catch (const std::exception &e) { + throw std::logic_error{fmt::format("{} is not a valid term.")}; + } + } + + /** + * Checks whether a Term string is parsable. + * @param identifier identifier rdf term string + * @return if identifier is parsable. + */ + inline bool term_parsable(const std::string &identifier) { + try { + parse_term(identifier); + return true; + } catch (const std::logic_error &e) { + return false; + } + } + +}; // namespace Dice::rdf +#endif//RDF_PARSER_MAKE_TERM_HPP diff --git a/include/Dice/RDF/Term.hpp b/include/Dice/RDF/Term.hpp new file mode 100644 index 0000000..a32a0d6 --- /dev/null +++ b/include/Dice/RDF/Term.hpp @@ -0,0 +1,286 @@ +#ifndef DICE_RDF_TERM_HPP +#define DICE_RDF_TERM_HPP + + +#include +#include +#include +#include +#include + +#include +#include + + +namespace Dice::rdf { + + /** + * This is a portable a string view. It is not bound to a single string but applicable to any copy of a string. + */ + struct unbound_string_view { + std::ptrdiff_t start = 0; + std::size_t count = 0; + + /** + * Get a string_view for the given string. + * @param str string to be viewed + * @return string_view on the string + */ + [[nodiscard]] std::string_view string_view(const std::string &str) const { + return {str.c_str() + start, count}; + } + }; + + class Literal; + + class BNode; + + class URIRef; + + /** + * An RDF Term. + */ + class Term { + public: + /** + * Type of a Term. + */ + enum NodeType { + None = 0, + URIRef_, + BNode_, + Literal_ + + }; + + protected: + std::string identifier_{}; + NodeType node_type_{}; + unbound_string_view value_{}; + // TODO: we may use std::variant for _lang and _data_type + unbound_string_view lang_{}; + unbound_string_view data_type_{}; + + /** + * Is to being used by subtypes URIRef, BNode and Literal. + * @param identifier + * @param node_type + */ + Term(std::string identifier, NodeType node_type) : identifier_(std::move(identifier)), + node_type_(node_type) {} + + public: + Term() = default; + + Term(Term &) = default; + + Term(const Term &) = default; + + Term(Term &&) = default; + + Term &operator=(const Term &) = default; + + Term &operator=(Term &&) = default; + + + [[nodiscard]] const std::string &getIdentifier() const { + return identifier_; + } + + [[nodiscard]] inline const NodeType &type() const { + return node_type_; + } + + [[nodiscard]] inline bool isLiteral() const { + return node_type_ == NodeType::Literal_; + } + + [[nodiscard]] inline bool isBNode() const { + return node_type_ == NodeType::BNode_; + } + + [[nodiscard]] inline bool isURIRef() const { + return node_type_ == NodeType::URIRef_; + } + + [[nodiscard]] inline Literal &castLiteral(); + + [[nodiscard]] inline BNode &castBNode(); + + [[nodiscard]] inline URIRef &castURIRef(); + + [[nodiscard]] inline const Literal &castLiteral() const; + + [[nodiscard]] inline const BNode &castBNode() const; + + [[nodiscard]] inline const URIRef &castURIRef() const; + + [[nodiscard]] inline std::string_view value() const { + return value_.string_view(identifier_); + } + + inline bool operator==(const Term &rhs) const { + return identifier_ == rhs.identifier_; + } + + inline bool operator!=(const Term &rhs) const { + return identifier_ != rhs.identifier_; + } + + inline bool operator<(const Term &rhs) const { + return identifier_ < rhs.identifier_; + } + + inline bool operator>(const Term &rhs) const { + return identifier_ > rhs.identifier_; + } + + friend bool operator==(const Term &lhs, const std::unique_ptr &rhs) { + return lhs == *rhs; + } + + friend bool operator==(const std::unique_ptr &lhs, const Term &rhs) { + return *lhs == rhs; + } + + friend bool operator==(const std::unique_ptr &lhs, const std::unique_ptr &rhs) { + return *lhs == *rhs; + } + + friend bool operator==(const Term *lhs, const std::unique_ptr &rhs) { + return *lhs == *rhs; + } + + friend bool operator==(const std::unique_ptr &lhs, const Term *rhs) { + return *lhs == *rhs; + } + + [[nodiscard]] std::size_t hash() const { + return ::Dice::hash::dice_hash(this->identifier_); + } + }; + + class URIRef : public Term { + + public: + explicit URIRef(const std::string &uri) : Term(fmt::format("<{}>", uri), NodeType::URIRef_) { + this->value_ = {1, uri.size()}; + }; + + [[nodiscard]] inline std::string_view uri() const { + return value(); + } + }; + + class BNode : public Term { + public: + explicit BNode(const std::string &bnode_label) : Term(fmt::format("_:{}", bnode_label), NodeType::BNode_) { + this->value_ = {2, bnode_label.size()}; + }; + + [[nodiscard]] inline std::string_view bnodeLabel() const { + return value(); + } + }; + + class Literal : public Term { + public: + Literal(const std::string &value, const std::optional &lang, + const std::optional &type) { + node_type_ = NodeType::Literal_; + if (lang) { + this->identifier_ = fmt::format("\"{}\"@{}", value, lang.value()); + this->lang_ = {(std::ptrdiff_t)(1 + value.size() + 1 + 1), lang->size()}; + } else if (type and (type != "http://www.w3.org/2001/XMLSchema#string")) { + // TODO: handle default cases + this->identifier_ = fmt::format("\"{}\"^^<{}>", value, type.value()); + this->data_type_ = {(std::ptrdiff_t)(1 + value.size() + 1 + 2 + 1), type->size()}; + // TODO: manage types types + } else { + this->identifier_ = fmt::format("\"{}\"", value); + } + this->value_ = {1, value.size()}; + } + + [[nodiscard]] inline std::string_view dataType() const { + return data_type_.string_view(identifier_); + } + + [[nodiscard]] inline std::string_view lang() const { + return lang_.string_view(identifier_); + } + + [[nodiscard]] inline bool hasDataType() const { + return data_type_.count != 0; + } + + [[nodiscard]] inline bool hasLang() const { + return lang_.count != 0; + } + }; + + Literal &Term::castLiteral() { + return static_cast(*this); + } + + BNode &Term::castBNode() { + return static_cast(*this); + } + + URIRef &Term::castURIRef() { + return static_cast(*this); + } + + const Literal &Term::castLiteral() const { + return static_cast(*this); + } + + const BNode &Term::castBNode() const { + return static_cast(*this); + } + + const URIRef &Term::castURIRef() const { + return static_cast(*this); + } +};// namespace Dice::rdf + +namespace Dice::hash { + template<> + inline std::size_t dice_hash(const Dice::rdf::Term &v) noexcept { + return v.hash(); + } +}// namespace Dice::hash + +template<> +struct std::hash { + size_t operator()(const Dice::rdf::Term &v) const { + return v.hash(); + } +}; + +template<> +struct fmt::formatter { + template + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template + auto format(const Dice::rdf::Term *p, FormatContext &ctx) { + if (p != nullptr) + return format_to(ctx.out(), p->getIdentifier()); + else + return format_to(ctx.out(), ""); + } +}; + +template<> +struct fmt::formatter { + template + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template + auto format(const Dice::rdf::Term &p, FormatContext &ctx) { + return format_to(ctx.out(), p.getIdentifier()); + } +}; + +#endif//DICE_RDF_TERM_HPP \ No newline at end of file diff --git a/include/Dice/RDF/Triple.hpp b/include/Dice/RDF/Triple.hpp new file mode 100644 index 0000000..6e7fc5f --- /dev/null +++ b/include/Dice/RDF/Triple.hpp @@ -0,0 +1,53 @@ +#ifndef RDF_PARSER_TRIPLE_HPP +#define RDF_PARSER_TRIPLE_HPP + +#include +#include + +#include "Dice/RDF/Term.hpp" +#include "Dice/RDF/internal/AbstractTriple.hpp" + +namespace Dice::rdf { + /** + * An RDF triple + */ + class Triple : public internal::AbstractTriple { + + using super_t = internal::AbstractTriple; + + public: + Triple() = default; + + Triple(Term subject, Term predicate, Term object) : super_t{std::move(subject), std::move(predicate), std::move(object)} {} + [[nodiscard]] size_t hash() const noexcept { + return ::Dice::hash::dice_hash(this->entries_); + } + }; +}// namespace Dice::rdf + +namespace Dice::hash { + template<> + inline std::size_t dice_hash(const Dice::rdf::Triple &v) noexcept { + return v.hash(); + } +}// namespace Dice::hash + +template<> +struct std::hash { + inline size_t operator()(const Dice::rdf::Triple &v) const { + return v.hash(); + } +}; + +template<> +struct fmt::formatter { + template + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template + auto format(const Dice::rdf::Triple &p, FormatContext &ctx) { + return format_to(ctx.out(), "({})", fmt::join(p, ", ")); + } +}; + +#endif//RDF_PARSER_TRIPLE_HPP diff --git a/include/Dice/RDF/internal/AbstractTriple.hpp b/include/Dice/RDF/internal/AbstractTriple.hpp new file mode 100644 index 0000000..31a3101 --- /dev/null +++ b/include/Dice/RDF/internal/AbstractTriple.hpp @@ -0,0 +1,65 @@ +#ifndef RDF_PARSER_ABSTRACT_TRIPLE_HPP +#define RDF_PARSER_ABSTRACT_TRIPLE_HPP + +#include + +namespace Dice::rdf::internal { + + template + class AbstractTriple { + protected: + typedef std::array Triple_t; + Triple_t entries_{}; + + public: + typedef typename Triple_t::iterator iterator; + typedef typename Triple_t::const_iterator const_iterator; + typedef typename Triple_t::reverse_iterator reverse_iterator; + typedef typename Triple_t::const_reverse_iterator const_reverse_iterator; + + AbstractTriple() = default; + + AbstractTriple(Element subject, Element predicate, Element object) : entries_{subject, predicate, object} {} + + [[nodiscard]] const Element &subject() const { return entries_[0]; } + + [[nodiscard]] Element &subject() { return entries_[0]; } + + [[nodiscard]] const Element &predicate() const { return entries_[1]; } + + [[nodiscard]] Element &predicate() { return entries_[1]; } + + [[nodiscard]] const Element &object() const { return entries_[2]; } + + [[nodiscard]] Element &object() { return entries_[2]; } + + Element &operator[](std::size_t pos) { return entries_[pos]; } + + const Element &operator[](std::size_t pos) const { return entries_[pos]; } + + + void setSubject(Element subject) { entries_[0] = std::move(subject); } + + void setPredicate(Element predicate) { entries_[1] = std::move(predicate); } + + void setObject(Element object) { entries_[2] = std::move(object); } + + iterator begin() { return entries_.begin(); } + const_iterator begin() const { return entries_.begin(); } + iterator end() { return entries_.end(); } + const_iterator end() const { return entries_.end(); } + iterator rbegin() { return entries_.rbegin(); } + const_iterator rbegin() const { return entries_.rbegin(); } + reverse_iterator rend() { return entries_.rend(); } + const_reverse_iterator rend() const { return entries_.rend(); } + + friend bool operator==(const Derived &triple1, const Derived &triple2) { + return (triple1.subject() == triple2.subject() and + triple1.predicate() == triple2.predicate() and + triple1.object() == triple2.object()); + } + }; + +}// namespace Dice::rdf::internal + +#endif//RDF_PARSER_ABSTRACT_TRIPLE_HPP diff --git a/include/Dice/SPARQL/TriplePattern.hpp b/include/Dice/SPARQL/TriplePattern.hpp new file mode 100644 index 0000000..5228491 --- /dev/null +++ b/include/Dice/SPARQL/TriplePattern.hpp @@ -0,0 +1,79 @@ +#ifndef RDF_PARSER_SPARQL_TRIPLEPATTERNELEMENT_HPP +#define RDF_PARSER_SPARQL_TRIPLEPATTERNELEMENT_HPP + +#include + +#include "Dice/RDF/Term.hpp" +#include "Dice/RDF/internal/AbstractTriple.hpp" +#include "Dice/SPARQL/Variable.hpp" + +namespace Dice::sparql { + using VarOrTerm = std::variant; +}// namespace Dice::sparql + +namespace Dice::hash { + template<> + inline std::size_t dice_hash(const Dice::sparql::VarOrTerm &v) noexcept { + if (std::holds_alternative(v)) + return dice_hash(std::get(v)); + else + return dice_hash(std::get(v)); + } +}// namespace Dice::hash + +namespace Dice::sparql { + class TriplePattern : public ::Dice::rdf::internal::AbstractTriple { + + using super_t = ::Dice::rdf::internal::AbstractTriple; + + public: + TriplePattern() = default; + + TriplePattern(VarOrTerm subject, VarOrTerm predicate, VarOrTerm object) : super_t{std::move(subject), std::move(predicate), std::move(object)} {} + + + [[nodiscard]] size_t hash() const noexcept { + return ::Dice::hash::dice_hash(this->entries_); + } + }; +}// namespace Dice::sparql + +namespace Dice::hash { + template<> + inline std::size_t dice_hash(const Dice::sparql::TriplePattern &v) noexcept { + return v.hash(); + } +}// namespace Dice::hash + +template<> +struct std::hash { + inline size_t operator()(const Dice::sparql::TriplePattern &v) const { + return v.hash(); + } +}; + +template<> +struct fmt::formatter { + template + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template + auto format(const Dice::sparql::VarOrTerm &p, FormatContext &ctx) { + if (std::holds_alternative(p)) + return format_to(ctx.out(), "{}", std::get(p)); + else + return format_to(ctx.out(), "{}", std::get(p)); + } +}; + +template<> +struct fmt::formatter { + template + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template + auto format(const Dice::sparql::TriplePattern &p, FormatContext &ctx) { + return format_to(ctx.out(), "({})", fmt::join(p, ", ")); + } +}; +#endif//RDF_PARSER_SPARQL_TRIPLEPATTERNELEMENT_HPP diff --git a/include/Dice/SPARQL/Variable.hpp b/include/Dice/SPARQL/Variable.hpp new file mode 100644 index 0000000..08b0ce8 --- /dev/null +++ b/include/Dice/SPARQL/Variable.hpp @@ -0,0 +1,87 @@ +#ifndef RDF_PARSER_SPARQL_TRIPLEVARIABLE_HPP +#define RDF_PARSER_SPARQL_TRIPLEVARIABLE_HPP + +#include +#include + +#include +#include + +namespace Dice::sparql { + class Variable { + private: + std::string name_; + bool is_anonym_ = false; + + public: + Variable() = default; + explicit Variable(std::string var_name, bool anonym = false) : name_{std::move(var_name)}, + is_anonym_{anonym} {} + + inline bool operator==(const Variable &rhs) const { + return name_ == rhs.name_; + } + + inline bool operator!=(const Variable &rhs) const { + return name_ != rhs.name_; + } + + inline bool operator<(const Variable &rhs) const { + return name_ < rhs.name_; + } + + inline bool operator>(const Variable &rhs) const { + return name_ > rhs.name_; + } + + inline void setName(std::string name) { + this->name_ = std::move(name); + } + + [[nodiscard]] const std::string &getName() const { + return name_; + } + + inline void setIs_anonym(bool is_anonym) { + this->is_anonym_ = is_anonym; + } + + [[nodiscard]] bool isAnon() const { + return is_anonym_; + } + + [[nodiscard]] std::size_t hash() const noexcept { + return Dice::hash::dice_hash(std::make_tuple(this->name_, this->is_anonym_)); + } + }; +}// namespace Dice::sparql + +namespace Dice::hash { + template<> + inline std::size_t dice_hash(const Dice::sparql::Variable &v) noexcept { + return v.hash(); + } +}// namespace Dice::hash + +template<> +struct std::hash { + size_t operator()(const Dice::sparql::Variable &v) const { + return v.hash(); + } +}; + +template<> +struct fmt::formatter { + template + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template + auto format(const Dice::sparql::Variable &p, FormatContext &ctx) { + if (p.isAnon()) { + return format_to(ctx.out(), "_:{}", p.getName()); + } else { + return format_to(ctx.out(), "?{}", p.getName()); + } + } +}; +#endif//RDF_PARSER_SPARQL_TRIPLEVARIABLE_HPP diff --git a/include/Dice/rdf-parser/TurtleFileParser.hpp b/include/Dice/rdf-parser/TurtleFileParser.hpp new file mode 100644 index 0000000..69ef5ac --- /dev/null +++ b/include/Dice/rdf-parser/TurtleFileParser.hpp @@ -0,0 +1,151 @@ +#ifndef RDF_PARSER_TURTLEPEGTLCONCURRENTSTREAMPARSER_HPP +#define RDF_PARSER_TURTLEPEGTLCONCURRENTSTREAMPARSER_HPP + +/** + * CuncurrentStreamParser is responsible for parsing Rdfs from stream sources. + * It also creates its own thread for parsing. + * It is also responsible for synchronizing between the parsing thread and the triples queue + * It parse a file as a stream and put the parsed triples increasingly in a tbb::concurrent_bounded_queue + * It is the best choice for very large files or stream sources. + */ + +#include +#include +#include +#include +#include + +#include "Dice/rdf-parser/exception/RDFParsingExecption.hpp" +#include "Dice/rdf-parser/internal/Turtle/Actions/Actions.hpp" +#include "Dice/rdf-parser/internal/Turtle/Configurations.hpp" +#include "Dice/rdf-parser/internal/Turtle/Parsers/AbstractParser.hpp" +#include "Dice/rdf-parser/internal/Turtle/States/ConcurrentState.hpp" +#include "Dice/rdf-parser/internal/exception//InternalError.hpp" +#include "Dice/rdf-parser/internal/util/ScopedThread.hpp" + +namespace Dice::rdf_parser::Turtle::parsers { + + /* + * + */ + class TurtleFileParser : public internal::Turtle::Parsers::AbstractParser { + using Term = Dice::rdf::Term; + using Triple = Dice::rdf::Triple; + + private: + // TODO: we don't need the smart pointers for the members + + boost::lockfree::spsc_queue + parsedTerms{internal::Turtle::Configurations::RdfConcurrentStreamParser_QueueCapacity}; + size_t upperThreshold; + size_t lowerThreshold; + + std::ifstream stream; + std::condition_variable cv; + std::mutex m; + std::condition_variable cv2; + std::mutex m2; + std::atomic_bool termCountWithinThresholds; + std::atomic_bool termsCountIsNotEmpty; + std::atomic_bool parsingIsDone; + std::unique_ptr parsingThread; + + public: + using Iterator = internal::Turtle::Parsers::Iterator; + void startParsing(std::string filename, std::size_t bufferSize) { + namespace Grammar = internal::Turtle::Grammar; + namespace States = internal::Turtle::States; + namespace Actions = internal::Turtle::Actions; + try { + + States::ConcurrentState + state(parsedTerms, + upperThreshold, + cv, m, + cv2, m2, + termCountWithinThresholds, + termsCountIsNotEmpty, + parsingIsDone); + tao::pegtl::parse, Actions::action>( + tao::pegtl::istream_input(stream, bufferSize, filename), std::move(state)); + } catch (std::exception &e) { + throw exception::RDFParsingException(); + } + } + + ~TurtleFileParser() override { + stream.close(); + } + + /** + * + * @param filename name of the file to be parsed + * @param queue_capacity maximum number of entries which are cached. When the capacity is reached processing stops. + * @param queue_capacity_lower_threshold after queue_capacity was reach, when queue reached this length, processing starts again. + */ + explicit TurtleFileParser(const std::string &filename, + const size_t queue_capacity = internal::Turtle::Configurations::RdfConcurrentStreamParser_QueueCapacity, + const size_t queue_capacity_lower_threshold = internal::Turtle::Configurations::RdfConcurrentStreamParser_QueueCapacity / 10) + : stream{filename}, + upperThreshold(queue_capacity), + lowerThreshold(queue_capacity_lower_threshold), + cv{}, + m{}, + cv2{}, + m2{}, + termCountWithinThresholds{false}, + termsCountIsNotEmpty{false}, + parsingIsDone{false}, + parsingThread{std::make_unique( + std::thread(&TurtleFileParser::startParsing, this, filename, + internal::Turtle::Configurations::RdfConcurrentStreamParser_BufferSize))} { + if (queue_capacity < queue_capacity_lower_threshold) { + throw std::logic_error{"queue_capacity_lower_threshold must not be larger than queue_capacity."}; + } + } + + + void nextTriple_impl() { + parsedTerms.pop(this->current_triple); + if (parsedTerms.read_available() < lowerThreshold) { + { + std::lock_guard lk(m); + termCountWithinThresholds = true; + } + cv.notify_one(); + } + } + + bool hasNextTriple_impl() { + if (parsedTerms.read_available() != 0) { + return true; + } else { + + //check if the parsing is done + if (parsingIsDone) { + return false; + } else { + std::unique_lock lk(m2); + termsCountIsNotEmpty = false; + cv2.wait(lk, [&] { return termsCountIsNotEmpty.load(); }); + + if (parsedTerms.read_available() != 0) + return true; + + else if (parsingIsDone) { + return false; + } else { + throw internal::exception::InternalError(); + } + } + }; + }; + + + internal::Turtle::Parsers::Iterator begin_impl() { + return internal::Turtle::Parsers::Iterator(this); + }; + }; +}// namespace Dice::rdf_parser::Turtle::parsers + +#endif//RDF_PARSER_TURTLEPEGTLCONCURRENTSTREAMPARSER_HPP diff --git a/include/Dice/rdf-parser/TurtleStringParser.hpp b/include/Dice/rdf-parser/TurtleStringParser.hpp new file mode 100644 index 0000000..d24251b --- /dev/null +++ b/include/Dice/rdf-parser/TurtleStringParser.hpp @@ -0,0 +1,64 @@ + +#ifndef RDF_PARSER_TURTLEPEGTLSTRINGPARSER_HPP +#define RDF_PARSER_TURTLEPEGTLSTRINGPARSER_HPP + + +/** + * StringParser is responsible for parsing rdf from string sources. + */ + + +#include +#include + +#include "Dice/rdf-parser/internal/Turtle/Actions/Actions.hpp" +#include "Dice/rdf-parser/internal/Turtle/Parsers/BaseStringParser.hpp" + + +namespace Dice::rdf_parser { + + class TurtleStringParser : public internal::Turtle::Parsers::BaseStringParser { + + + public: + using Iterator [[maybe_unused]] = internal::Turtle::Parsers::Iterator; + + /** + * The constructor start the parsing.if the input is not valid it will throws and exception. + * it also invoke nextTriple to have the first triple ready for using . + * @param text the string to parse + */ + explicit TurtleStringParser(std::string text) : BaseStringParser(std::move(text)) {} + + /** + * checks whether a string is valid rdf turtle file + */ + static bool isParsable(const std::string &input) { + namespace Grammar = internal::Turtle::Grammar; + try { + tao::pegtl::string_input in(input, "the text"); + tao::pegtl::parse>(in); + return true; + } catch (std::exception &e) { + return false; + } + } + + /** + * calculate the time for parsing a rdf turtle string. + * Note that the calculated time is only for parsing without using processing the input(creating and storing the triples out of the string) + */ + static long calculateParsingTime(const std::string &input) { + std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + isParsable(input); + std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(t2 - t1).count(); + return duration; + } + + + ~TurtleStringParser() override = default; + }; +}// namespace Dice::rdf_parser + +#endif//RDF_PARSER_TURTLEPEGTLSTRINGPARSER_HPP diff --git a/include/Dice/rdf-parser/exception/RDFParsingExecption.hpp b/include/Dice/rdf-parser/exception/RDFParsingExecption.hpp new file mode 100644 index 0000000..af774e1 --- /dev/null +++ b/include/Dice/rdf-parser/exception/RDFParsingExecption.hpp @@ -0,0 +1,13 @@ +#ifndef RDF_PARSER_RDFPARSINGEXECPTION_HPP +#define RDF_PARSER_RDFPARSINGEXECPTION_HPP + +namespace Dice::rdf_parser::exception { + class RDFParsingException : public std::exception { + public: + [[nodiscard]] const char *what() const noexcept override { + return "The rdf document can't be parsed"; + } + }; +}// namespace Dice::rdf_parser::exception + +#endif//RDF_PARSER_RDFPARSINGEXECPTION_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/Actions/Actions.hpp b/include/Dice/rdf-parser/internal/Turtle/Actions/Actions.hpp new file mode 100644 index 0000000..4dd6c8a --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/Actions/Actions.hpp @@ -0,0 +1,127 @@ +#ifndef RDF_PARSER_ACTIONS_HPP +#define RDF_PARSER_ACTIONS_HPP + +#include "Dice/rdf-parser/internal/Turtle/Actions/BasicActions.hpp" +#include "Dice/rdf-parser/internal/Turtle/States/State.hpp" + +/** + * Actions define how to deal with the parsed grammars during the parsing and allow to store information in the states. + * For more information about actions please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md# + * This file contains the actions required for parsing RDF triples in a whole file,stream or string. +*/ +namespace Dice::rdf_parser::internal::Turtle::Actions { + + template<> + struct action { + template + static void apply(const Input &in, States::State &state) { + state.syncWithMainThread(); + } + }; + + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.clearTripleParameters(); + } + }; + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.processTripleSeq(); + } + }; + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + //add the unlabeled blank node from BNPL as subject + state.setSubject(state.getFirst_BNPL()); + state.processTripleSeq(); + } + }; + + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.setSubject(state.getElement()); + } + }; + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.processVerb(); + } + }; + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.pushCurrentTermIntoBnpl_collection_list(); + } + }; + + + template<> + struct action { + template + static void apply(const Input &in, States::State &state) { + state.moveBnpl_collection_listIntoStack(); + } + }; + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.processCollection(); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::State &state) { + state.moveBnpl_collection_listIntoStack(); + } + }; + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.processBlankNodePropertyList(); + } + }; + + + template + struct action> { + template + static void apply(const Input &in, States::State &state) { + state.processPredicateObjectListInner(); + } + }; + + + template<> + struct action { + template + static void apply(const Input &in, States::State &state) { + //Here parsingIsDone lock is set to true + state.setParsingIsDone(); + } + }; +}// namespace Dice::rdf_parser::internal::Turtle::Actions + +#endif//RDF_PARSER_ACTIONS_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/Actions/BasicActions.hpp b/include/Dice/rdf-parser/internal/Turtle/Actions/BasicActions.hpp new file mode 100644 index 0000000..6d7f8a1 --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/Actions/BasicActions.hpp @@ -0,0 +1,359 @@ +#ifndef RDF_PARSER_BASICACTIONS_HPP +#define RDF_PARSER_BASICACTIONS_HPP + + +/** +Actions define how to deal with the parsed grammars during the parsing and allow to store information in the states. +For more information about actions please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md# + +This file contains the actions required for parsing RDF term . +*/ + + +#include "Dice/SPARQL/Variable.hpp" +#include "Dice/rdf-parser/internal/Turtle/Grammar.hpp" +#include "Dice/rdf-parser/internal/Turtle/States/BasicState.hpp" + +namespace Dice::rdf_parser::internal::Turtle::Actions { + + template + struct action : ::tao::pegtl::nothing {}; + + template<> + struct action : ::tao::pegtl::discard_input {}; + + template + struct action> : tao::pegtl::discard_input {}; + + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string ignore; + std::string value; + + ss >> ignore;//read PREFIX and ignore it + ss >> value; //read the value + value.erase(0, 1); + value.erase(value.length() - 1, 1); + + state.setBase(std::move(value)); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string prefix; + std::string ignore; + std::string value; + + ss >> ignore; //read @prefix and ignore it + ss >> prefix; //read the prefix + prefix.erase(prefix.length() - 1, 1);// erase : at the end of the prefix + ss >> value; //read the value + value.erase(0, 1); + value.erase(value.length() - 1, 1); + + state.addPrefix(prefix, value); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string prefix; + std::string ignore; + std::string value; + + ss >> ignore; //read PREFIX and ignore it + ss >> prefix; //read the prefix + prefix.erase(prefix.length() - 1, 1);// erase : at the end of the prefix + ss >> value; //read the value + value.erase(0, 1); + value.erase(value.length() - 1, 1); + + state.addPrefix(prefix, value); + } + }; + + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string ignore; + std::string value; + + ss >> ignore;//read PREFIX and ignore it + ss >> value; //read the value + value.erase(0, 1); + value.erase(value.length() - 1, 1); + + state.setBase(std::move(value)); + } + }; + + template<> + struct action { + using URIRef = Dice::rdf::URIRef; + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string statement; + std::string prefix; + std::string value; + + ss >> statement;//read the whole statement + int pos = statement.find(':'); + prefix = statement.substr(0, pos); + + if (auto mappedPrefix_opt = state.getPrefixValue(prefix); + mappedPrefix_opt.has_value()) { + const std::string &mappedPrefix = mappedPrefix_opt.value(); + state.setElement(URIRef(mappedPrefix + statement.substr(pos + 1, statement.length() - prefix.length()))); + state.setIri_is_IRIREF(false); + } else { + throw std::runtime_error("undefined prefix"); + } + }; + }; + + template<> + struct action { + using URIRef = Dice::rdf::URIRef; + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str().substr(1, ss.str().length() - 2); + //check for @base + if (not state.getBase().empty()) + s = s.insert(1, state.getBase()); + + state.setElement(URIRef(s)); + state.setIri_is_IRIREF(true); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + state.setBlank_node_string(state.createBlankNodeLabel()); + }; + }; + + template<> + struct action { + using Literal = Dice::rdf::Literal; + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str(); + + state.setLiteral_string(std::move(s));// TODO: is that necessary + state.setElement(Literal(state.getLiteral_string(), std::nullopt, "xsd:boolean")); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + state.setType_tag("xsd:double"); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + state.setType_tag("xsd:decimal"); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + state.setType_tag("xsd:integer"); + } + }; + + template<> + struct action { + using Literal = Dice::rdf::Literal; + + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str(); + + state.setElement(Literal(s, std::nullopt, state.getType_tag())); + } + }; + + template<> + struct action { + using Literal = Dice::rdf::Literal; + + template + static void apply(const Input &in, States::BasicState &state) { + //check if this RdfLiteral has IRI part + if (state.isTypeTagFound()) { + std::string tag; + //set it again to false + state.setType_tag_found(false); + + const std::string &type_tag = state.getType_tag(); + //check if the type tag is iri or PREFIXED NAME and process it accordingly + if (not state.iriIsIRIREF()) { + size_t pos = type_tag.find(':'); + std::string prefix = type_tag.substr(0, pos); + if (auto mappedPrefix_opt = state.getPrefixValue(prefix); + mappedPrefix_opt.has_value()) { + const std::string &mappedPrefix = mappedPrefix_opt.value(); + tag = mappedPrefix + type_tag.substr(pos + 1, type_tag.length()); + } else { + tag = type_tag; + } + } else { + tag = type_tag; + } + state.setElement(Literal{state.getLiteral_string(), std::nullopt, tag}); + } + //check if this RdfLiteral has langTag part + else if (state.isLangTagFound()) { + //set it again to false + state.setLang_tag_found(false); + state.setElement(Literal(state.getLiteral_string(), state.getLang_tag(), std::nullopt)); + } else { + state.setElement(Literal(state.getLiteral_string(), std::nullopt, std::nullopt)); + } + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str(); + + state.setType_tag_found(true); + //check if the iri is IRIREF or a PerfixedName + if (state.iriIsIRIREF()) + // set the Literal type tag without the "^^<" at the beginning amd without ">" at the end . + state.setType_tag(s.substr(3, s.length() - 3 - 1)); + else + state.setType_tag(s.substr(2)); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str().substr(1); + + + state.setLang_tag_found(true); + state.setLan_tag(std::move(s)); + } + }; + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str().substr(1, ss.str().length() - 1 - 1); + state.setLiteral_string(std::move(s)); + } + }; + + + template<> + struct action { + using BNode = Dice::rdf::BNode; + using Variable = Dice::sparql::Variable; + + + template + static void apply(const Input &in, States::BasicState &state) { + if constexpr (not SparqlQuery) + state.setElement(BNode(state.getBlank_node_string())); + else + state.setElement(Variable(state.getBlank_node_string())); + } + }; + + + template<> + struct action { + using URIRef = Dice::rdf::URIRef; + template + static void apply(const Input &in, States::BasicState &state) { + std::string fixedURI = ""; + state.setElement(URIRef(fixedURI)); + }; + }; + + + template<> + struct action { + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str().substr(2); + + state.setLiteral_string(std::move(s));// TODO: is that necessary + state.setBlank_node_string(state.getLiteral_string()); + }; + }; + + template<> + struct action { + using Variable = Dice::sparql::Variable; + + template + static void apply(const Input &in, States::BasicState &state) { + std::stringstream ss; + ss << in.string(); + std::string s; + s = ss.str().substr(1, ss.str().length() - 1); + state.setElement(Variable(s)); + ; + } + }; +}// namespace Dice::rdf_parser::internal::Turtle::Actions + +#endif//RDF_PARSER_BASICACTIONS_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/Configurations.hpp b/include/Dice/rdf-parser/internal/Turtle/Configurations.hpp new file mode 100644 index 0000000..dec49f0 --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/Configurations.hpp @@ -0,0 +1,9 @@ +#ifndef RDF_PARSER_CONFIG_HPP +#define RDF_PARSER_CONFIG_HPP + +namespace Dice::rdf_parser::internal::Turtle::Configurations { + constexpr std::size_t RdfConcurrentStreamParser_BufferSize = 1024 * 1024 * 128; + constexpr std::size_t RdfConcurrentStreamParser_QueueCapacity = 100'000; +}// namespace Dice::rdf_parser::internal::Turtle::Configurations + +#endif//RDF_PARSER_CONFIG_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/Grammar.hpp b/include/Dice/rdf-parser/internal/Turtle/Grammar.hpp new file mode 100644 index 0000000..ddc7dad --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/Grammar.hpp @@ -0,0 +1,412 @@ +#ifndef RDF_PARSER_TURTLEPEGTLGRAMMER_HPP +#define RDF_PARSER_TURTLEPEGTLGRAMMER_HPP + +/** +All grammars for turtle are defined here.PEGTL is used for parsing. +For more information about how to define grammars and rules using PEGTl please check "https://github.com/taocpp/PEGTL/blob/master/doc/Rules-and-Grammars.md" +*/ + + +#include + +namespace Dice::rdf_parser::internal::Turtle::Grammar { + using namespace tao::pegtl; + + // Terminals + + struct PN_LOCAL_ESC + : seq, + sor, + one<'~'>, + one<'.'>, + one<'-'>, + one<'!'>, + one<'$'>, + one<'&'>, + one<'\''>, + one<'('>, + one<')'>, + one<'*'>, + one<'+'>, + one<','>, + one<';'>, + one<'='>, + one<'/'>, + one<'?'>, + one<'#'>, + one<'@'>, + one<'%'>>> { + }; + + struct HEX : + //ToDo + ranges<'a', 'f', 'A', 'F', '0', '9'> { + }; + + struct PERCENT : seq, HEX, HEX> { + }; + + struct PLX : sor { + }; + + struct PN_CHARS_BASE + : sor> { + }; + + struct PN_CHARS_U : sor> { + }; + + struct PN_CHARS + : sor, + digit, + utf8::one<0x00B7>, + utf8::range<0x0300, 0x036F>, + utf8::range<0x203F, 0x2040>> { + }; + + struct PN_LOCAL + : seq< + sor< + PN_CHARS_U, + one<':'>, + digit, + PLX>, + star< + star< + one<'.'>>, + plus< + sor< + PN_CHARS, + one<':'>, + PLX>>>> { + }; + + struct PN_PREFIX : seq< + PN_CHARS_BASE, + star< + star>, + PN_CHARS>> { + }; + + struct ANON_WS : one<' ', '\t', '\r', '\n'> { + }; + + struct ANON : seq, star, one<']'>> { + }; + + struct UCHAR + : sor< + seq, one<'u'>, HEX, HEX, HEX, HEX>, + seq, one<'U'>, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX>> { + }; + + struct ECHAR + : seq, sor, one<'b'>, one<'n'>, one<'r'>, one<'f'>, one<'"'>, one<'\''>, one<'\\'>>> { + }; + + struct STRING_LITERAL_LONG_QUOTE : seq< + one<'\"'>, one<'\"'>, one<'\"'>, + star< + opt< + sor< + seq, one<'\"'>>, + one<'\"'>>>, + sor, ECHAR, UCHAR>>, + one<'\"'>, one<'\"'>, one<'\"'>> { + }; + + struct STRING_LITERAL_QUOTE + : seq< + one<'\"'>, + star< + sor< + not_one<'\"', '\\', '\r', '\n'>, + ECHAR, + UCHAR> + + >, + one<'\"'>> { + }; + + struct STRING_LITERAL_SINGLE_QUOTE + : seq< + one<'\''>, + star< + sor< + not_one<'\'', '\\', '\r', '\n'>, + ECHAR, + UCHAR> + + >, + one<'\''>> { + }; + + struct STRING_LITERAL_LONG_SINGLE_QUOTE + : seq< + one<'\''>, one<'\''>, one<'\''>, + star< + opt< + sor< + seq, one<'\''>>, + one<'\''>>>, + sor, ECHAR, UCHAR>>, + one<'\''>, one<'\''>, one<'\''>> { + }; + + struct INTEGER : seq, one<'-'>>>, plus> { + }; + + struct DECIMAL : seq, one<'-'>>>, star, one<'.'>, plus> { + }; + + struct EXPONENT + : seq, one<'E'>>, opt, one<'-'>>>, plus> { + }; + + struct DOUBLE + : seq, one<'-'>>>, sor< + seq, one<'.'>, star, EXPONENT>, + seq, plus, EXPONENT>, + seq, EXPONENT>>> { + }; + + struct LANGTAG : seq, plus, star, plus>>> { + }; + + struct BLANK_NODE_LABEL : seq< + one<'_'>, + one<':'>, + sor< + PN_CHARS_U, + digit>, + star< + star>, + PN_CHARS>> { + }; + + struct PNAME_NS : seq, one<':'>> { + }; + + struct PNAME_LN : seq { + }; + + struct IRIREF : seq< + one<'<'>, + star', '"', '{', '}', '|', '^', '`', '\\'>>, tao::pegtl::range<0x00, 0x20>>, + UCHAR>>, + one<'>'>> { + }; + + struct comment : seq< + one<'#'>, + star>, + one<'\n', '\r'>> { + }; + + + // Grammar rules + + + struct ignored : star> { + }; + + struct NumericLiteral : sor { + }; + + struct turtleString + : sor { + }; + + struct BlankNode : sor { + }; + + struct PrefixedName : sor { + }; + + struct iri : sor { + }; + + struct prefixID : seq, ignored, PNAME_NS, ignored, IRIREF, ignored, one<'.'>> { + }; + + struct sparqlPrefix : seq, + one<'R', 'r'>, + one<'E', 'e'>, + one<'F', 'f'>, + one<'I', 'i'>, + one<'X', 'x'>, + ignored, + PNAME_NS, + ignored, + IRIREF> { + }; + + struct base : seq, ignored, IRIREF, ignored, one<'.'>> { + }; + + struct sparqlBase : seq, one<'A', 'a'>, one<'S', 's'>, one<'E', 'e'>, ignored, IRIREF> { + }; + + struct directive : sor { + }; + + struct RdfLiteralTypeTag : seq, iri> { + }; + + struct RdfLiteral : seq>> { + }; + + struct BooleanLiteral : sor, string<'f', 'a', 'l', 's', 'e'>> { + }; + + struct literal : sor { + }; + + + struct term : sor { + }; + + //The following grammars are not part of turtle.they are part of sparql language.but used here to make it + //possible to parse some sparql grammar which has very similar context to turtle. + + struct varname : seq< + sor, + star, utf8::range<0x0300, 0x036F>, utf8::range<0x203F, 0x2040>>>> { + }; + + struct var1 : seq, varname> { + }; + + struct var2 : seq, varname> { + }; + + struct var : sor { + }; + + struct varOrTerm : sor {}; + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////// + + template + struct collection; + template + struct blankNodePropertyList; + template + struct object : std::conditional_t, blankNodePropertyList>, + sor, blankNodePropertyList>> {}; + + template + struct objectList : seq, star, ignored, object>>> { + }; + + struct predicate : iri { + }; + + struct collectionBegin : one<'('> { + }; + + template + struct collection : seq>>, + ignored, + one<')'>> {}; + + struct verb_a : one<'a'> { + }; + + template + struct verb : sor, predicate>, verb_a> { + }; + + template + struct predicateObjectListInner : seq, ignored, objectList> { + }; + + template + struct predicateObjectList : seq< + predicateObjectListInner, + star< + seq< + ignored, + one<';'>, + opt< + seq< + ignored, + predicateObjectListInner>>>>> { + }; + + struct blankNodePropertyListBegin : one<'['> { + }; + + template + struct blankNodePropertyList + : seq, ignored, one<']'>> { + }; + + template + struct subject : std::conditional_t>> {}; + + template + struct tripleSeq1 : seq< + subject, + ignored, + predicateObjectList> { + }; + + template + struct tripleSeq2 : seq< + blankNodePropertyList, + ignored, + opt>> { + }; + + template + struct triple : sor< + tripleSeq1, tripleSeq2> { + }; + + + template + struct tripleExtended : seq< + triple, ignored, one<'.'>> { + }; + + struct statement : sor< + tripleExtended, + directive> { + }; + + + struct statementsCollection : sor>, seq> { + }; + + + struct turtleDoc : seq< + plus< + seq>, + ignored> { + }; + + + struct triplesBlock : seq< + plus< + seq>>, + ignored> { + }; + + + template + struct grammar : std::conditional_t, + sor< + must, + seq>> {}; + + +}// namespace Dice::rdf_parser::internal::Turtle::Grammar + +#endif//RDF_PARSER_TURTLEPEGTLGRAMMER_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/Parsers/AbstractParser.hpp b/include/Dice/rdf-parser/internal/Turtle/Parsers/AbstractParser.hpp new file mode 100644 index 0000000..abaac0d --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/Parsers/AbstractParser.hpp @@ -0,0 +1,108 @@ +#ifndef RDF_PARSER_ABSTRACTPARSER_HPP +#define RDF_PARSER_ABSTRACTPARSER_HPP + +#include "Dice/RDF/Triple.hpp" +#include "Dice/SPARQL/TriplePattern.hpp" + + +/** + * Base class for parsing triples from different sources. + */ + +namespace Dice::rdf_parser::internal::Turtle::Parsers { + + template + class Iterator; + + template + class AbstractParser { + using Term = Dice::rdf::Term; + using VarOrTerm = Dice::sparql::VarOrTerm; + using Triple = Dice::rdf::Triple; + using TriplePattern = Dice::sparql::TriplePattern; + using Triple_t = std::conditional_t; + + protected: + using element_type = Triple_t; + + explicit AbstractParser() = default; + + element_type current_triple; + + + public: + /** + * process to the next parsed triple. + */ + void nextTriple() { + static_cast(this)->nextTriple_impl(); + } + + /** + * check whether there is a further triple + */ + [[nodiscard]] virtual bool hasNextTriple() { + return static_cast(this)->hasNextTriple_impl(); + } + + /** + * get the current triple + */ + const element_type &getCurrentTriple() { + return current_triple; + } + + + virtual ~AbstractParser() = default; + + + Iterator begin() { + return static_cast(this)->begin_impl(); + } + + bool end() { return false; } + }; + + template + class Iterator { + using Term = Dice::rdf::Term; + using Triple = Dice::rdf::Triple; + using TriplePattern = Dice::sparql::TriplePattern; + using Triple_t = std::conditional_t; + + private: + bool done_; + bool parser_done_; + Parser *triplesParser = nullptr; + + public: + explicit Iterator(Parser *triplesParser) : done_{false}, parser_done_{false}, triplesParser{triplesParser} { + //check if there is at least one parsed triple + if (triplesParser->hasNextTriple()) + this->operator++(); + else + parser_done_ = true; + }; + + void operator++() { + if (parser_done_) { + done_ = true; + } else { + triplesParser->nextTriple(); + if (not triplesParser->hasNextTriple()) + parser_done_ = true; + } + } + + void operator++(int) { operator++(); } + + operator bool() { return not done_; } + + const Triple_t &operator*() { return triplesParser->getCurrentTriple(); } + }; + + +};// namespace Dice::rdf_parser::internal::Turtle::Parsers + + +#endif//RDF_PARSER_ABSTRACTPARSER_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/Parsers/BaseStringParser.hpp b/include/Dice/rdf-parser/internal/Turtle/Parsers/BaseStringParser.hpp new file mode 100644 index 0000000..a1e9f47 --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/Parsers/BaseStringParser.hpp @@ -0,0 +1,90 @@ +#ifndef RDF_PARSER_BASESTRINGPARSER_HPP +#define RDF_PARSER_BASESTRINGPARSER_HPP + + +#include + +#include + +#include "Dice/rdf-parser/exception/RDFParsingExecption.hpp" +#include "Dice/rdf-parser/internal/Turtle/Actions/Actions.hpp" +#include "Dice/rdf-parser/internal/Turtle/Parsers/AbstractParser.hpp" +#include "Dice/rdf-parser/internal/Turtle/States/SequentialState.hpp" + +/** + * BaseStringParser is a base class for parsing string sources . + * It parse the string one time and put the parsed elements in a std::queue + */ + +namespace Dice::rdf_parser::internal::Turtle::Parsers { + + template + class BaseStringParser : public AbstractParser, sparqlQuery> { + + using Triple = Dice::rdf::Triple; + using TriplePattern = Dice::sparql::TriplePattern; + using Triple_t = std::conditional_t; + + protected: + /** + * a queue for storing parsed triples . + */ + std::queue parsedTerms; + + protected: + /** + * The constructor start the parsing.if the input is not valid it will throws and exception. + * it also invoke nextTriple to have the first triple ready for using . + * @param text the string to parse + */ + explicit BaseStringParser(std::string text) { + try { + tao::pegtl::string_input input(std::move(text), "the text"); + States::SequentialState state(parsedTerms); + tao::pegtl::parse, Actions::action>(input, state); + + } catch (std::exception &e) { + throw exception::RDFParsingException(); + } + } + + /** + * The constructor start the parsing.if the input is not valid it will throws and exception. + * it also invoke nextTriple to have the first triple ready for using . + * @param text the string to parse + * @param prefix_map defines prefixes to be added before parsing + */ + BaseStringParser(std::string text, const robin_hood::unordered_map &prefix_map) { + try { + tao::pegtl::string_input input(text, "the text"); + States::SequentialState state(parsedTerms); + for (auto pair : prefix_map) + state.addPrefix(pair.first, pair.second); + tao::pegtl::parse, Actions::action>(input, state); + + } catch (std::exception &e) { + throw exception::RDFParsingException(); + } + } + + public: + [[nodiscard]] bool hasNextTriple_impl() const { + return not parsedTerms.empty(); + } + + ~BaseStringParser() override = default; + + void nextTriple_impl() { + this->current_triple = parsedTerms.front(); + parsedTerms.pop(); + } + + + Iterator begin_impl() { + return Iterator(this); + } + }; +}// namespace Dice::rdf_parser::internal::Turtle::Parsers + + +#endif//RDF_PARSER_BASESTRINGPARSER_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/States/BasicState.hpp b/include/Dice/rdf-parser/internal/Turtle/States/BasicState.hpp new file mode 100644 index 0000000..17a07ec --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/States/BasicState.hpp @@ -0,0 +1,144 @@ +#ifndef RDF_PARSER_BASICSTATE_HPP +#define RDF_PARSER_BASICSTATE_HPP + + +#include +#include +#include + +#include + +#include "Dice/RDF/Term.hpp" +#include "Dice/RDF/Triple.hpp" +#include "Dice/SPARQL/TriplePattern.hpp" + + +/** + * States store information needed during and after the parsing. + * For more information about states please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md#states + */ +namespace Dice::rdf_parser::internal::Turtle::States { + + /* + * BasicState defines the data structures related to rdf term + */ + + + template + class BasicState { + public: + BasicState() : element_{std::make_shared()} { + } + + using Term = Dice::rdf::Term; + using VarOrTerm = Dice::sparql::VarOrTerm; + using Triple = Dice::rdf::Triple; + using Element_t = std::conditional_t; + + protected: + std::shared_ptr element_; + + //variables to deal with type and lan tags in literals + bool type_tag_found = false; + bool lang_tag_found = false; + bool iri_is_IRIREF = false; + std::string lang_tag_; + std::string type_tag_; + std::string literal_string_; + std::string blank_node_string_; + + //dealing with base directives + std::string base_; + + int latest_BN_label = 1; + + // todo: use something optimized + robin_hood::unordered_map prefix_map; + + + public: + inline Element_t &getElement() { return *element_; } + + inline void setElement(Element_t element) { *(this->element_) = std::move(element); } + + + inline void addPrefix(std::string prefix, std::string value) { + prefix_map.emplace(std::pair(std::move(prefix), std::move(value))); + } + + inline void setLan_tag(std::string lan_tag) { this->lang_tag_ = std::move(lan_tag); } + + inline void setType_tag(std::string type_tag) { this->type_tag_ = std::move(type_tag); } + + + [[nodiscard]] bool isTypeTagFound() const { + return type_tag_found; + } + + inline void setType_tag_found(bool found) { + this->type_tag_found = found; + } + + + [[nodiscard]] bool isLangTagFound() const { + return lang_tag_found; + } + + inline void setLang_tag_found(bool found) { + this->lang_tag_found = found; + } + + inline void setIri_is_IRIREF(bool found) { + this->iri_is_IRIREF = found; + } + + const std::string &getType_tag() { + return type_tag_; + } + + const std::string &getLang_tag() { + return lang_tag_; + } + + bool iriIsIRIREF() { + return iri_is_IRIREF; + } + + const std::string &getBlank_node_string() { + return blank_node_string_; + } + + const std::string &getLiteral_string() { + return literal_string_; + } + + inline void setLiteral_string(std::string literal_string) { this->literal_string_ = std::move(literal_string); } + + inline void setBlank_node_string(std::string blank_node_string) { + this->blank_node_string_ = std::move(blank_node_string); + } + + inline void setBase(std::string base) { this->base_ = std::move(base); } + + const std::string &getBase() { return base_; } + + // create a unique label for a BlankNode + std::string createBlankNodeLabel() { + return fmt::format("b{}", latest_BN_label++); + } + + [[nodiscard]] inline std::optional> + getPrefixValue(const std::string &prefix) const { + if (auto prefix_iter = prefix_map.find(prefix); + prefix_iter != prefix_map.end()) { + const auto &mappedPrefix = prefix_iter->second; + return std::optional>{mappedPrefix}; + } else { + return std::nullopt; + } + } + }; +}// namespace Dice::rdf_parser::internal::Turtle::States + + +#endif//RDF_PARSER_BASICSTATE_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/States/ConcurrentState.hpp b/include/Dice/rdf-parser/internal/Turtle/States/ConcurrentState.hpp new file mode 100644 index 0000000..7763ff6 --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/States/ConcurrentState.hpp @@ -0,0 +1,101 @@ +#ifndef RDF_PARSER_CONCURRENTSTATE_HPP +#define RDF_PARSER_CONCURRENTSTATE_HPP + +/** +States store information needed during and after the parsing. +For more information about states please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md#states + +*/ + +#include +#include +#include +#include +#include + +#include + +#include "Dice/rdf-parser/internal/Turtle/Configurations.hpp" +#include "Dice/rdf-parser/internal/Turtle/States/State.hpp" + +namespace Dice::rdf_parser::internal::Turtle::States { + + /* + * ConcurrentState deal with the logic of Concurrent parsing (already parsed triples can be accessed during the parsing). + */ + template + class ConcurrentState : public State> { + + using Term = Dice::rdf::Term; + using Triple = Dice::rdf::Triple; + using TriplePattern = Dice::sparql::TriplePattern; + using Triple_t = std::conditional_t; + + private: + //Defines threshold for triples in the Queue(should be assigned by the constructor) + size_t upperThreshold; + std::condition_variable &cv; + std::mutex &m; + std::condition_variable &cv2; + std::mutex &m2; + std::atomic_bool &termCountWithinThresholds; + std::atomic_bool &termsCountIsNotEmpty; + std::atomic_bool &parsingIsDone; + + boost::lockfree::spsc_queue &parsed_elements; + + public: + explicit ConcurrentState( + boost::lockfree::spsc_queue &parsingQueue, + size_t upperThreshold, + std::condition_variable &cv, std::mutex &m, + std::condition_variable &cv2, std::mutex &m2, + std::atomic_bool &termCountWithinThresholds, + std::atomic_bool &termsCountIsNotEmpty, + std::atomic_bool &parsingIsDone) + : parsed_elements(parsingQueue), + upperThreshold(upperThreshold), + cv(cv), m(m), + cv2(cv2), m2(m2), + termCountWithinThresholds(termCountWithinThresholds), + termsCountIsNotEmpty(termsCountIsNotEmpty), + parsingIsDone(parsingIsDone) {} + + inline void syncWithMainThread_impl() { + if (this->parsed_elements.read_available() > upperThreshold) { + std::unique_lock lk(m); + termCountWithinThresholds = false; + //set the parsing thread to sleep + cv.wait(lk, [&] { return termCountWithinThresholds.load(); }); + //the parsing thread wake from sleeping + } + } + + inline void insertTriple_impl(Triple_t triple) { + if (not termsCountIsNotEmpty) { + { + std::lock_guard lk(m2); + termsCountIsNotEmpty = true; + } + this->parsed_elements.push(std::move(triple)); + cv2.notify_one(); + } else { + this->parsed_elements.push(std::move(triple)); + } + } + + + void setParsingIsDone_impl() { + if (not termsCountIsNotEmpty) { + { + std::lock_guard lk(m2); + termsCountIsNotEmpty = true; + } + cv2.notify_one(); + } + parsingIsDone = true; + } + }; + +}// namespace Dice::rdf_parser::internal::Turtle::States +#endif//RDF_PARSER_CONCURRENTSTATE_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/States/SequentialState.hpp b/include/Dice/rdf-parser/internal/Turtle/States/SequentialState.hpp new file mode 100644 index 0000000..bdfd57a --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/States/SequentialState.hpp @@ -0,0 +1,49 @@ +// +// Created by fakhr on 14.01.21. +// + +#ifndef RDF_PARSER_SEQUENTIALSTATE_HPP +#define RDF_PARSER_SEQUENTIALSTATE_HPP + + +/** +States store information needed during and after the parsing. +For more information about states please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md#states + +*/ + + +#include "Dice/rdf-parser/internal/Turtle/States/State.hpp" + + +namespace Dice::rdf_parser::internal::Turtle::States { + + /* + * SequentialState deal with the logic of Sequential parsing (parsed triples cant be accessed until all the file or the string is parsed). + */ + template + class SequentialState : public State> { + using Term = Dice::rdf::Term; + using Triple = Dice::rdf::Triple; + using TriplePattern = Dice::sparql::TriplePattern; + using Triple_t = std::conditional_t; + + private: + std::queue &parsed_elements; + + public: + explicit SequentialState(std::queue &parsingQueue) : parsed_elements(parsingQueue){}; + + inline void syncWithMainThread_impl() { + } + + inline void insertTriple_impl(Triple_t triple) { + this->parsed_elements.push(std::move(triple)); + } + + void setParsingIsDone_impl() { + } + }; +}// namespace Dice::rdf_parser::internal::Turtle::States + +#endif//RDF_PARSER_SEQUENTIALSTATE_HPP diff --git a/include/Dice/rdf-parser/internal/Turtle/States/State.hpp b/include/Dice/rdf-parser/internal/Turtle/States/State.hpp new file mode 100644 index 0000000..826be03 --- /dev/null +++ b/include/Dice/rdf-parser/internal/Turtle/States/State.hpp @@ -0,0 +1,200 @@ +#ifndef RDF_PARSER_STATE_HPP +#define RDF_PARSER_STATE_HPP + +/** +States store information needed during and after the parsing. +For more information about states please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md#states + +*/ + +#include +#include + +#include "Dice/RDF/Triple.hpp" +#include "Dice/SPARQL/TriplePattern.hpp" +#include "Dice/rdf-parser/internal/Turtle/States/BasicState.hpp" + + +namespace Dice::rdf_parser::internal::Turtle::States { + + /* + * State defines the base data structures related to the whole grammar (stores the parsed triples). It is a base class for SequentialState and ConcurrentState + */ + + + template + class State : public BasicState { + + using Term = Dice::rdf::Term; + using URIRef = Dice::rdf::URIRef; + using BNode = Dice::rdf::BNode; + using VarOrTerm = Dice::sparql::VarOrTerm; + using Triple = Dice::rdf::Triple; + using TriplePattern = Dice::sparql::TriplePattern; + using Element_t = std::conditional_t; + using Triple_t = std::conditional_t; + + protected: + State() = default; + + /** + * Blank Node Property List + */ + using BnplCollectionList = std::vector; + using VerbObjectPair = std::pair; + + + //we use this to solve the case when 2 verbs are pushed into the stack without a pop between + //to solve the PredicateObjectList recursive problem. + std::stack verb_stack; + int verb_stack_one_step_pre_size = 0; + int verb_stack_two_step_pre_size = 0; + + Element_t subject_; + + + //deal with multi terms and nesting + BnplCollectionList bnpl_collection_list; + //stack for dealing with collections + std::stack bnpl_collection_list_stack; + + std::vector verb_object_pair_list; + std::stack> verb_object_pair_list_stack; + //to deal with the case when there are BNPL + optional predicateObjectList + Element_t first_BNPL; + + + public: + inline void syncWithMainThread() { static_cast(this)->syncWithMainThread_impl(); }; + + inline void insertTriple(Triple_t triple) { return static_cast(this)->insertTriple_impl(triple); }; + + void setParsingIsDone() { static_cast(this)->setParsingIsDone_impl(); }; + + + inline void clearTripleParameters() { + verb_stack_two_step_pre_size = 0; + verb_stack_one_step_pre_size = 0; + bnpl_collection_list.clear(); + verb_object_pair_list.clear(); + bnpl_collection_list.clear(); + } + + + inline void setSubject(Element_t subject) { + this->subject_ = std::move(subject); + } + + + void processVerb() { + verb_stack_two_step_pre_size = verb_stack_one_step_pre_size; + verb_stack_one_step_pre_size = verb_stack.size(); + verb_stack.push(this->getElement()); + // + if (verb_stack.size() == verb_stack_two_step_pre_size + 2) { + verb_object_pair_list_stack.push(verb_object_pair_list); + verb_object_pair_list.clear(); + } + } + + void processCollection() { + std::vector LocalParsedTerms; + URIRef first("rdf:first"); + URIRef rest("rdf:rest"); + URIRef nil("rdf:nil"); + if (bnpl_collection_list.size() == 0) { + BNode unlabeledNode(this->createBlankNodeLabel()); + Triple_t triple(unlabeledNode, first, nil); + LocalParsedTerms.push_back(triple); + *(this->element_) = unlabeledNode; + } else { + bool lastElement = true; + for (auto object = bnpl_collection_list.rbegin(); + object != bnpl_collection_list.rend(); object++) { + BNode unlabeledNode(this->createBlankNodeLabel()); + Triple_t triple1; + triple1.setSubject(unlabeledNode); + triple1.setPredicate(rest); + + Triple_t triple2(unlabeledNode, first, *object); + //case 1 :last element_ : + if (lastElement) { + lastElement = false; + triple1.setObject(nil); + + } else { + triple1.setObject((LocalParsedTerms[LocalParsedTerms.size() - 1]).subject()); + } + *(this->element_) = unlabeledNode; + LocalParsedTerms.push_back(triple1); + LocalParsedTerms.push_back(triple2); + } + } + for (auto triple : LocalParsedTerms) + insertTriple(triple); + bnpl_collection_list.clear(); + bnpl_collection_list = bnpl_collection_list_stack.top(); + bnpl_collection_list_stack.pop(); + } + + void processBlankNodePropertyList() { + //create new Blank Node as subject + BNode unlabeledNode(this->createBlankNodeLabel()); + //add the the unlabeledNode to object list + *(this->element_) = unlabeledNode; + auto verbObjectPairList = verb_object_pair_list; + //go through all the VerbObject pairs and make triples out of them with the unlabeled subject + for (auto &pair : verbObjectPairList) { + Triple_t triple(unlabeledNode, pair.first, pair.second); + insertTriple(triple); + } + verb_object_pair_list.clear(); + + + if (!verb_object_pair_list_stack.empty()) { + verb_object_pair_list = verb_object_pair_list_stack.top(); + verb_object_pair_list_stack.pop(); + } else { + first_BNPL = unlabeledNode; + } + bnpl_collection_list = bnpl_collection_list_stack.top(); + bnpl_collection_list_stack.pop(); + } + + void processPredicateObjectListInner() { + auto verb = verb_stack.top(); + verb_stack.pop(); + for (auto &object : bnpl_collection_list) { + verb_object_pair_list.push_back(std::make_pair<>(verb, object)); + } + bnpl_collection_list.clear(); + } + + + inline void processTripleSeq() { + //add the subject to each pair in verbObjectsList and create a triple out of that + for (auto &pair : verb_object_pair_list) { + Triple_t triple(subject_, pair.first, pair.second); + //add the created triple into the store + insertTriple(triple); + } + } + + inline void moveBnpl_collection_listIntoStack() { + bnpl_collection_list_stack.push(bnpl_collection_list); + bnpl_collection_list.clear(); + } + + inline void pushCurrentTermIntoBnpl_collection_list() { + bnpl_collection_list.push_back(this->getElement()); + } + + + Element_t &getFirst_BNPL() { + return first_BNPL; + } + }; +}// namespace Dice::rdf_parser::internal::Turtle::States + + +#endif//RDF_PARSER_STATE_HPP diff --git a/include/Dice/rdf-parser/internal/exception/InternalError.hpp b/include/Dice/rdf-parser/internal/exception/InternalError.hpp new file mode 100644 index 0000000..7ab0d2f --- /dev/null +++ b/include/Dice/rdf-parser/internal/exception/InternalError.hpp @@ -0,0 +1,14 @@ +#ifndef RDF_PARSER_INTERNALERROR_HPP +#define RDF_PARSER_INTERNALERROR_HPP + +namespace Dice::rdf_parser::internal::exception { + class InternalError : public std::exception { + public: + [[nodiscard]] const char *what() const noexcept override { + return "Internal error. This should not happen. Contact the maintainer"; + } + }; +}// namespace Dice::rdf_parser::internal::exception + + +#endif//RDF_PARSER_INTERNALERROR_HPP diff --git a/include/Dice/rdf-parser/internal/util/ScopedThread.hpp b/include/Dice/rdf-parser/internal/util/ScopedThread.hpp new file mode 100644 index 0000000..f03f9c0 --- /dev/null +++ b/include/Dice/rdf-parser/internal/util/ScopedThread.hpp @@ -0,0 +1,26 @@ +#ifndef PEGTL_RDF_PARSER_SCOPED_THREAD_HPP +#define PEGTL_RDF_PARSER_SCOPED_THREAD_HPP + +#include +#include + +namespace Dice::rdf_parser::internal::util { + + class ScopedThread { + std::thread t; + + public: + explicit ScopedThread(std::thread t_) : t(std::move(t_)) { + if (!t.joinable()) throw std::logic_error("No thread"); + } + + ~ScopedThread() { + t.join(); + } + + ScopedThread(ScopedThread &) = delete; + + ScopedThread &operator=(ScopedThread const &) = delete; + }; +}// namespace Dice::rdf_parser::internal::util +#endif//PEGTL_RDF_PARSER_SCOPED_THREAD_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp b/include/Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp deleted file mode 100644 index b2635a7..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp +++ /dev/null @@ -1,146 +0,0 @@ -#ifndef RDF_PARSER_ACTIONS_HPP -#define RDF_PARSER_ACTIONS_HPP - -#include "BasicActions.hpp" -#include "Dice/rdf_parser/Parser/Turtle/States/State.hpp" - -/** -Actions define how to deal with the parsed grammers during the parsing and allow to store information in the states. -For more information about actions please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md# - -This file contains the actions required for pasrsing RDF triples in a whole file,stream or string. -*/ - - -namespace rdf_parser::Turtle { - namespace Actions { - - template<> - struct action { - template - static void apply(const Input &in, States::State &state) { - - state.syncWithMainThread(); - - } - }; - - - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - state.clearTripleParameters(); - - } - }; - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - - state.proccessTripleSeq(); - - } - }; - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - - //add the unlabeled blank node from BNPL as subject - state.setSubject(state.getFirst_BNPL()); - - state.proccessTripleSeq(); - - } - }; - - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - state.setSubject(state.getElement()); - } - }; - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - state.proccessVerb(); - } - }; - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - state.pushCurrentTermIntoBnpl_collection_list(); - } - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::State &state) { - state.moveBnpl_collection_listIntoStack(); - } - }; - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - state.proccessCollection(); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::State &state) { - state.moveBnpl_collection_listIntoStack(); - } - }; - - template - struct action> { - template - static void apply(const Input &in, States::State &state) { - state.proccessBlankNodePropertyList(); - } - }; - - - template - struct action > { - template - static void apply(const Input &in, States::State &state) { - state.proccessPredicateObjectListInner(); - } - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::State &state) { - //Here parsingIsDone lock is set to true - state.setPasrsingIsDone(); - } - }; - - - - - - } -} - -#endif //RDF_PARSER_ACTIONS_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Actions/BasicActions.hpp b/include/Dice/rdf_parser/Parser/Turtle/Actions/BasicActions.hpp deleted file mode 100644 index 19b60c8..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Actions/BasicActions.hpp +++ /dev/null @@ -1,367 +0,0 @@ -#ifndef RDF_PARSER_BASICACTIONS_HPP -#define RDF_PARSER_BASICACTIONS_HPP - - -/** -Actions define how to deal with the parsed grammers during the parsing and allow to store information in the states. -For more information about actions please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md# - -This file contains the actions required for pasrsing RDF term . -*/ - - -#include "Dice/rdf_parser/Parser/Turtle/Grammer.hpp" -#include "Dice/rdf_parser/Parser/Turtle/States/BasicState.hpp" - -namespace { - using namespace tao::pegtl; -} - -namespace rdf_parser::Turtle { - namespace Actions { - - template - struct action - : nothing { - }; - - - template<> - struct action : tao::pegtl::discard_input { - }; - - template - struct action> : tao::pegtl::discard_input { - }; - - - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - //std::cout << in.string(); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - //std::cout << in.string(); - } - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - //std::cout << "PN_LOCAL" << in.string() << std::endl; - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string ignore; - std::string value; - - ss >> ignore; //read PREFIX and ignore it - ss >> value; //read the value - value.erase(0, 1); - value.erase(value.length() - 1, 1); - - state.setBase(value); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string prefix; - std::string ignore; - std::string value; - - ss >> ignore; //read @prefix and ignore it - ss >> prefix; //read the prefix - prefix.erase(prefix.length() - 1, 1); // erase : at the end of the prefix - ss >> value; //read the value - value.erase(0, 1); - value.erase(value.length() - 1, 1); - - state.addPrefix(prefix, value); - - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string prefix; - std::string ignore; - std::string value; - - ss >> ignore; //read PREFIX and ignore it - ss >> prefix; //read the prefix - prefix.erase(prefix.length() - 1, 1); // erase : at the end of the prefix - ss >> value; //read the value - value.erase(0, 1); - value.erase(value.length() - 1, 1); - - state.addPrefix(prefix, value); - - } - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string ignore; - std::string value; - - ss >> ignore; //read PREFIX and ignore it - ss >> value; //read the value - value.erase(0, 1); - value.erase(value.length() - 1, 1); - - state.setBase(value); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string statement; - std::string prefix; - std::string value; - - ss >> statement; //read the whole statement - int pos = statement.find(':'); - prefix = statement.substr(0, pos); - value = statement.substr(pos + 1, statement.length() - prefix.length()); - - - if (state.hasPrefix(prefix)) { - std::string mappedPrefix = state.getPrefixValue(prefix); - value = mappedPrefix +"/"+value; - state.setElement(URIRef(value)); - state.setIri_is_IRIREF(false); - } else { - throw std::runtime_error("undefined prefix"); - } - }; - - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str().substr(1, ss.str().length() - 2); - //check for @base - if (not state.getBase().empty()) - s = s.insert(1, state.getBase()); - - state.setElement(URIRef(s)); - state.setIri_is_IRIREF(true); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - state.setBlank_node_string(state.createBlankNodeLabel()); - }; - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str(); - - state.setLiteral_string(s); - state.setElement(Literal(s, std::nullopt, "xsd:boolean")); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - state.setType_tag("xsd:double"); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - state.setType_tag("xsd:decimal"); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - state.setType_tag("xsd:integer"); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str(); - - state.setElement(Literal(s, std::nullopt, state.getType_tag())); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - state.proccessRdfLiteral(); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str(); - - state.setType_tag_found(true); - //check if the iri is IRIREF or a PerfixedName - if (state.iriIsIRIREF()) - // set the Literal type tag without the "^^<" at the beginning amd without ">" at the end . - state.setType_tag(s.substr(3, s.length() - 3 - 1)); - else - state.setType_tag(s.substr(2)); - - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str().substr(1); - - - state.setLang_tag_found(true); - state.setLan_tag(s); - } - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str().substr(1, ss.str().length() - 1 - 1); - state.setLiteral_string(s); - - } - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - state.setElement(BNode(state.getBlank_node_string())); - } - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::string fixedURI = ""; - state.setElement(URIRef(fixedURI)); - }; - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str().substr(2); - - state.setLiteral_string(s); - state.setBlank_node_string(s); - }; - }; - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - //Here parsingIsDone lock is set to true - } - }; - - - template<> - struct action { - template - static void apply(const Input &in, States::BasicState &state) { - std::stringstream ss; - ss << in.string(); - std::string s; - s = ss.str().substr(1,ss.str().length()-1); - state.setElement(SparqlQuery::TripleVariable(s)); - ; - } - }; - - - - } -} - -#endif //RDF_PARSER_BASICACTIONS_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Grammer.hpp b/include/Dice/rdf_parser/Parser/Turtle/Grammer.hpp deleted file mode 100644 index 6d202c0..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Grammer.hpp +++ /dev/null @@ -1,519 +0,0 @@ -#ifndef RDF_PARSER_TURTLEPEGTLGRAMMER_HPP -#define RDF_PARSER_TURTLEPEGTLGRAMMER_HPP - -/** -All grammers for turtle are defined here.PEGTL is used for parsing. -For more information about how to define grammers and rules using PEGTl please check "https://github.com/taocpp/PEGTL/blob/master/doc/Rules-and-Grammars.md" -*/ - - -#include - -namespace { - using namespace tao::pegtl; -} - - -namespace rdf_parser::Turtle::Grammer { - -// Terminals - - struct PN_LOCAL_ESC - : seq, - sor, - one<'~'>, - one<'.'>, - one<'-'>, - one<'!'>, - one<'$'>, - one<'&'>, - one<'\''>, - one<'('>, - one<')'>, - one<'*'>, - one<'+'>, - one<','>, - one<';'>, - one<'='>, - one<'/'>, - one<'?'>, - one<'#'>, - one<'@'>, - one<'%'>>> { - }; - - struct HEX : - //ToDo - ranges<'a', 'f', 'A', 'F', '0', '9'> { - }; - - struct PERCENT : - seq, HEX, HEX> { - }; - - struct PLX : - sor { - }; - - struct PN_CHARS_BASE - : sor> { - }; - - struct PN_CHARS_U : - sor> { - }; - - struct PN_CHARS - : sor, - digit, - utf8::one<0x00B7>, - utf8::range<0x0300, 0x036F>, - utf8::range<0x203F, 0x2040>> { - }; - - struct PN_LOCAL - : seq< - sor< - PN_CHARS_U, - one<':'>, - digit, - PLX - >, - star< - star< - one<'.'> - >, - plus< - sor< - PN_CHARS, - one<':'>, - PLX - > - > - > - > { - }; - - struct PN_PREFIX : - seq< - PN_CHARS_BASE, - star< - star>, - PN_CHARS - > - > { - }; - - struct ANON_WS : - one<' ', '\t', '\r', '\n'> { - }; - - struct ANON : - seq, star, one<']'>> { - }; - - struct UCHAR - : sor< - seq, one<'u'>, HEX, HEX, HEX, HEX>, - seq, one<'U'>, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX> - > { - }; - - struct ECHAR - : seq, sor, one<'b'>, one<'n'>, one<'r'>, one<'f'>, one<'"'>, one<'\''>, one<'\\'>>> { - }; - - struct STRING_LITERAL_LONG_QUOTE : - seq< - one<'\"'>, one<'\"'>, one<'\"'>, - star< - opt< - sor< - seq, one<'\"'>>, - one<'\"'> - > - >, - sor, ECHAR, UCHAR> - >, - one<'\"'>, one<'\"'>, one<'\"'> - > { - }; - - struct STRING_LITERAL_QUOTE - : seq< - one<'\"'>, - star< - sor< - not_one<'\"', '\\', '\r', '\n'>, - ECHAR, - UCHAR - > - - >, - one<'\"'> - > { - }; - - struct STRING_LITERAL_SINGLE_QUOTE - : seq< - one<'\''>, - star< - sor< - not_one<'\'', '\\', '\r', '\n'>, - ECHAR, - UCHAR - > - - >, - one<'\''> - > { - }; - - struct STRING_LITERAL_LONG_SINGLE_QUOTE - : seq< - one<'\''>, one<'\''>, one<'\''>, - star< - opt< - sor< - seq, one<'\''>>, - one<'\''> - > - >, - sor, ECHAR, UCHAR> - >, - one<'\''>, one<'\''>, one<'\''> - > { - }; - - struct INTEGER : - seq, one<'-'>>>, plus> { - }; - - struct DECIMAL : - seq, one<'-'>>>, star, one<'.'>, plus> { - }; - - struct EXPONENT - : seq, one<'E'>>, opt, one<'-'>>>, plus> { - }; - - struct DOUBLE - : seq, one<'-'>>>, sor< - seq, one<'.'>, star, EXPONENT>, - seq, plus, EXPONENT>, - seq, EXPONENT> - > > { - }; - - struct LANGTAG : - seq, plus, star, plus>>> { - }; - - struct BLANK_NODE_LABEL : - seq< - one<'_'>, - one<':'>, - sor< - PN_CHARS_U, - digit - >, - star< - star>, - PN_CHARS - > - > { - }; - - struct PNAME_NS : - seq, one<':'>> { - }; - - struct PNAME_LN : - seq { - }; - - struct IRIREF : - seq< - one<'<'>, - star', '"', '{', '}', '|', '^', '`', '\\'>>, tao::pegtl::range<0x00, 0x20>>, - UCHAR - >>, - one<'>'> - > { - }; - - struct comment : - seq< - one<'#'>, - star>, - one<'\n', '\r'>> { - }; - - - -// Grammar rules - - - - struct ignored : - star> { - }; - - struct NumericLiteral : - sor { - }; - - struct turtleString - : sor { - }; - - struct BlankNode : - sor { - }; - - struct PrefixedName : - sor { - }; - - struct iri : - sor { - }; - - struct prefixID : - seq, ignored, PNAME_NS, ignored, IRIREF, ignored, one<'.'>> { - }; - - struct sparqlPrefix : - seq, - one<'R', 'r'>, - one<'E', 'e'>, - one<'F', 'f'>, - one<'I', 'i'>, - one<'X', 'x'>, - ignored, - PNAME_NS, - ignored, - IRIREF> { - }; - - struct base : - seq, ignored, IRIREF, ignored, one<'.'>> { - }; - - struct sparqlBase : - seq, one<'A', 'a'>, one<'S', 's'>, one<'E', 'e'>, ignored, IRIREF> { - }; - - struct directive : - sor { - }; - - struct RdfLiteralTypeTag : - seq, iri> { - }; - - struct RdfLiteral : - seq>> { - }; - - struct BooleanLiteral : - sor, string<'f', 'a', 'l', 's', 'e'>> { - }; - - struct literal : - sor { - }; - - - struct term : - sor { - }; - - //The following grammers are not part of turtle.they are part of sparql language.but used here to make it - //possible to parse some sparql grammer which has very similar context to turtle. - - struct varname : - seq< - sor, - star,utf8::range<0x0300, 0x036F>,utf8::range<0x203F, 0x2040>>> - >{ - }; - - struct var1 : - seq,varname> { - }; - - struct var2 : - seq,varname> { - }; - - struct var : - sor { - }; - - struct varOrTerm: - sor{}; - - ///////////////////////////////////////////////////////////////////////////////////////////////////////////// - - template - struct collection; - template - struct blankNodePropertyList; - template - struct object : - std::conditional_t, blankNodePropertyList>, - sor, blankNodePropertyList> - >{}; - - template - struct objectList : - seq, star, ignored, object>>> { - }; - - struct predicate : - iri { - }; - - struct collectionBegin : - one<'('> { - }; - - template - struct collection : - seq>>, - ignored, - one<')'> - >{}; - - struct verb_a : - one<'a'> { - }; - - template - struct verb : - sor,predicate>, verb_a> { - }; - - template - struct predicateObjectListInner : - seq, ignored, objectList> { - }; - - template - struct predicateObjectList : - seq< - predicateObjectListInner, - star< - seq< - ignored, - one<';'>, - opt< - seq< - ignored, - predicateObjectListInner - > - > - > - > - > { - }; - - struct blankNodePropertyListBegin : - one<'['> { - }; - - template - struct blankNodePropertyList - : seq, ignored, one<']'>> { - }; - - template - struct subject : - std::conditional_t> - >{}; - - template - struct tripleSeq1 : - seq< - subject, - ignored, - predicateObjectList - > { - }; - - template - struct tripleSeq2 : - seq< - blankNodePropertyList, - ignored, - opt> - > { - }; - - template - struct triple : - sor< - tripleSeq1, tripleSeq2> { - }; - - - template - struct tripleExtended : - seq< - triple, ignored, one<'.'> - > { - }; - - struct statement : - sor< - tripleExtended, - directive - > { - }; - - - struct statementsCollection : - sor>, seq> { - }; - - - struct turtleDoc : - seq< - plus< - seq - >, ignored - > { - }; - - - struct triplesBlock : - seq< - plus< - seq> - >, ignored - > { - }; - - - template - struct grammer : - std::conditional_t, - sor< - must, - seq - > - > - {}; - - -} - -#endif //RDF_PARSER_TURTLEPEGTLGRAMMER_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Parsers/ConcurrentStreamParser.hpp b/include/Dice/rdf_parser/Parser/Turtle/Parsers/ConcurrentStreamParser.hpp deleted file mode 100644 index 925587a..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Parsers/ConcurrentStreamParser.hpp +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef RDF_PARSER_TURTLEPEGTLCONCURRENTSTREAMPARSER_HPP -#define RDF_PARSER_TURTLEPEGTLCONCURRENTSTREAMPARSER_HPP - -/** - * CuncurrentStreamParser is responsible for parsing triples from stream sources. - * It also creates its own thread for parsing. - * It is also responsible for synchronizing between the parsing thread and the triples queue - * It parse a file as a stream and put the parsed triples increasingly in a tbb::concurrent_bounded_queue - * It is the best choice for very large files or stream sources. - */ - -#include -#include -#include -#include - -#include "TriplesParser.hpp" -#include "Dice/rdf_parser/util/scoped_thread.hpp" -#include "Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp" -#include "Dice/rdf_parser/Parser/Turtle/States/ConcurrentState.hpp" - -namespace { - using namespace tao::pegtl; -} - -namespace rdf_parser::Turtle::parsers { - - /* - * - */ - template - class CuncurrentStreamParser : public TriplesParser { - - private: - std::shared_ptr ,boost::lockfree::capacity<100000>>> parsedTerms; - unsigned int upperThrehold; - unsigned int lowerThrehold; - - std::ifstream stream; - std::unique_ptr parsingThread; - std::shared_ptr cv; - std::shared_ptr m; - std::shared_ptr cv2; - std::shared_ptr m2; - std::shared_ptr termCountWithinThreholds; - std::shared_ptr termsCountIsNotEmpty; - std::shared_ptr parsingIsDone; - - public: - - void startParsing(std::string filename, std::size_t bufferSize) { - try { - - States::ConcurrentState>> state(parsedTerms, - upperThrehold, cv, - m, cv2, m2, - termCountWithinThreholds, - termsCountIsNotEmpty, - parsingIsDone); - parse, Actions::action>(istream_input(stream, bufferSize, filename), state); - } - catch (std::exception &e) { - throw e; - } - } - - ~CuncurrentStreamParser() override { - stream.close(); - } - - - CuncurrentStreamParser(std::string filename, std::size_t bufferSize = 1024 * 1024, - unsigned int queueCapacity = 100000) : - stream{filename}, - upperThrehold(queueCapacity), - lowerThrehold(queueCapacity / 10), - parsedTerms{std::make_shared>>()}, - cv{std::make_shared()}, - m{std::make_shared()}, - cv2{std::make_shared()}, - m2{std::make_shared()}, - termCountWithinThreholds{std::make_shared(false)}, - termsCountIsNotEmpty{std::make_shared(false)}, - parsingIsDone{std::make_shared(false)} { - parsingThread = std::make_unique( - std::thread(&CuncurrentStreamParser::startParsing, this, filename, bufferSize)); - - } - - - void nextTriple() override { - parsedTerms->pop(*(this->current_triple)); - if (parsedTerms->read_available() < lowerThrehold) { - { - std::lock_guard lk(*m); - *termCountWithinThreholds = true; - } - cv->notify_one(); - } - - } - - bool hasNextTriple() const override { - if (parsedTerms->read_available() != 0) { - return true; - } else { - - //check if the parsing is done - if (*parsingIsDone) { - std::cout << "from main thread : parsingIsDone " << std::endl; - return false; - } else { - std::unique_lock lk(*m2); - *termsCountIsNotEmpty = false; - cv2->wait(lk, [&] { return termsCountIsNotEmpty->load(); }); - - if (parsedTerms->read_available() != 0) - return true; - - else if (*parsingIsDone) { - std::cout << "from main thread : parsingIsDone " << std::endl; - return false; - } else { - throw std::runtime_error(""); - } - - } - - }; - }; - - Iterator begin_implementation(){ - return Iterator(this); - } - }; -} - - -#endif //RDF_PARSER_TURTLEPEGTLCONCURRENTSTREAMPARSER_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Parsers/FileParser.hpp b/include/Dice/rdf_parser/Parser/Turtle/Parsers/FileParser.hpp deleted file mode 100644 index 7c2d9c3..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Parsers/FileParser.hpp +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef RDF_PARSER_TURTLEPEGTLFILEPARSER_HPP -#define RDF_PARSER_TURTLEPEGTLFILEPARSER_HPP - - -/** - * FileParser is responsible for parsing triples from file sources. - * It parse the file one time and put the parsed triples in a std::queue - */ - - -#include -#include - -#include "TriplesParser.hpp" -#include "Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp" - -namespace { - using namespace tao::pegtl; -} - -namespace rdf_parser::Turtle::parsers { - - template - class FileParser : public TriplesParser { - - private: - /** - * a queue for storing parsed triples . - */ - std::shared_ptr>> parsedTerms; - - public: - - - /** - * The constructor start the parsing.if the input is not valid it will throws and exception. - * it also invoke nextTriple to have the first triple ready for using . - * @param filename the filename of the file we want to parse - */ - FileParser(std::string filename) { - try { - std::ifstream infile(filename); - read_input file(filename); - States::State<> state(parsedTerms); - parse, Actions::action>(file, state); - - } - catch (std::exception &e) { - throw e; - } - - } - - ~FileParser() override { -// The constructors that take a FILE* argument take ownership of the file pointer, i.e. they fclose() it in the destructor.Therfore, no need to close the file in this destructor -// see https://github.com/taocpp/PEGTL/blob/master/doc/Inputs-and-Parsing.md#file-input - } - - bool hasNextTriple() const override { - return not parsedTerms->empty(); - } - - void nextTriple() override { - this->current_triple = parsedTerms->front(); - parsedTerms->pop(); - - } - - - /** - * checks whether a file is valid rdf turtle file - */ - static bool isParsable(std::string filename) { - try { - std::ifstream infile(filename); - read_input file(filename); - parse>(file); - return true; - } - catch (std::exception &e) { - return false; - } - } - - Iterator begin_implementation(){ - return Iterator(this); - } - - /** - * calculate the time for parsing a rdf turtle file. - * Note that the calculated time is only for parsing without using processing the input(creating and storing the triples out of the string) - */ - static long calculateParsingTime(const std::string filename) { - std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); - isParsable(filename); - std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - return duration; - } - - }; -} - - -#endif //RDF_PARSER_TURTLEPEGTLFILEPARSER_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Parsers/StreamParser.hpp b/include/Dice/rdf_parser/Parser/Turtle/Parsers/StreamParser.hpp deleted file mode 100644 index dab4b77..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Parsers/StreamParser.hpp +++ /dev/null @@ -1,101 +0,0 @@ - -#ifndef RDF_PARSER_TURTLEPEGTLSTREAMPARSER_HPP -#define RDF_PARSER_TURTLEPEGTLSTREAMPARSER_HPP - - -/** - * StreamParser is responsible for parsing triples from stream sources. - * It parse a file as a stream and put the parsed triples increasingly in a std::queue - */ - -#include "TriplesParser.hpp" - -#include "Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp" - -namespace { - using namespace tao::pegtl; -} - - -namespace rdf_parser::Turtle::parsers { - - - template - class StreamParser : public TriplesParser { - - private: - /** - * a queue for storing parsed triples . - */ - std::shared_ptr>> parsedTerms; - - /** - * defines the size of the stream buffer - */ - const std::size_t defaultBufferSize = 10000000; - - std::ifstream stream; - - public: - - - /** - * The constructor start the parsing.if the input is not valid it will throws and exception. - * it also invoke nextTriple to have the first triple ready for using . - * @param filename the filename of the file we want to parse - */ - StreamParser(std::string filename) : stream{filename} { - try { - read_input file(filename); - parsedTerms = std::make_shared>(); - States::State<> state(parsedTerms); - parse, Actions::action>(istream_input(stream, defaultBufferSize, filename), state); - - } - catch (std::exception &e) { - throw e; - } - - } - - - - ~StreamParser() override { - stream.close(); - } - - StreamParser(std::string filename, std::size_t bufferSize) { - try { - std::ifstream stream(filename); - read_input file(filename); - parsedTerms = std::make_shared>(); - States::State<> state(parsedTerms); - parse, Actions::action>(istream_input(stream, bufferSize, filename), state); - - } - catch (std::exception &e) { - throw e; - } - } - - - void nextTriple() override { - this->current_triple = parsedTerms->front(); - parsedTerms->pop(); - - } - - - bool hasNextTriple() const override { - return not parsedTerms->empty(); - } - - Iterator begin_implementation(){ - return Iterator(this); - } - - }; -} - - -#endif //RDF_PARSER_TURTLEPEGTLSTREAMPARSER_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Parsers/StringParser.hpp b/include/Dice/rdf_parser/Parser/Turtle/Parsers/StringParser.hpp deleted file mode 100644 index 8493065..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Parsers/StringParser.hpp +++ /dev/null @@ -1,130 +0,0 @@ - -#ifndef RDF_PARSER_TURTLEPEGTLSTRINGPARSER_HPP -#define RDF_PARSER_TURTLEPEGTLSTRINGPARSER_HPP - - -/** - * StringParser is responsible for parsing triples from string sources. - * It parse the string one time and put the parsed triples in a std::queue - */ - - - -#include - -#include "TriplesParser.hpp" -#include "Dice/rdf_parser/Parser/Turtle/Actions/Actions.hpp" - -namespace { - using namespace tao::pegtl; -} - - -namespace rdf_parser::Turtle::parsers { - - template - class StringParser : public TriplesParser { - - - private: - /** - * a queue for storing parsed triples . - */ - std::shared_ptr>> parsedTerms; - - public: - - - /** - * The constructor start the parsing.if the input is not valid it will throws and exception. - * it also invoke nextTriple to have the first triple ready for using . - * @param text the string to parse - */ - StringParser(std::string text) { - try { - string_input input(text, "the text"); - States::State state(parsedTerms); - parse, Actions::action>(input, state); - - } - catch (std::exception &e) { - throw e; - } - - - } - - /** - * The constructor start the parsing.if the input is not valid it will throws and exception. - * it also invoke nextTriple to have the first triple ready for using . - * @param text the string to parse - * @param prefix_map defines prefixes to be added before parsing - */ - StringParser(std::string text,std::map prefix_map) { - try { - string_input input(text, "the text"); - States::State state(parsedTerms); - for(auto pair : prefix_map) - state.addPrefix(pair.first,pair.second); - parse, Actions::action>(input, state); - - } - catch (std::exception &e) { - throw e; - } - - - } - - bool hasNextTriple() const override { - return not parsedTerms->empty(); - } - - ~StringParser() override { - - } - - void nextTriple() override { - *(this->current_triple) = parsedTerms->front(); - parsedTerms->pop(); - - } - - - /** - * checks whether a string is valid rdf turtle file - */ - static bool isParsable(const std::string &input) { - try { - string_input in(input, "the text"); - parse>(in); - return true; - } - catch (std::exception &e) { - return false; - } - - } - - Iterator begin_implementation(){ - return Iterator(this); - } - - - - /** - * calculate the time for parsing a rdf turtle string. - * Note that the calculated time is only for parsing without using processing the input(creating and storing the triples out of the string) - */ - static long calculateParsingTime(const std::string &input) { - std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); - isParsable(input); - std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - return duration; - } - - }; -} - -#endif //RDF_PARSER_TURTLEPEGTLSTRINGPARSER_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/Parsers/TriplesParser.hpp b/include/Dice/rdf_parser/Parser/Turtle/Parsers/TriplesParser.hpp deleted file mode 100644 index f55e115..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/Parsers/TriplesParser.hpp +++ /dev/null @@ -1,109 +0,0 @@ - -#ifndef RDF_PARSER_TRIPLESPARSER_HPP -#define RDF_PARSER_TRIPLESPARSER_HPP - -#include "Dice/rdf_parser/RDF/Triple.hpp" - - -/** - * Base class for parsing triples from different sources. - */ - -namespace { - using namespace rdf_parser::store::rdf; - - -} -namespace rdf_parser::Turtle::parsers { - - template class Derived,bool sparqlQuery> - class Iterator; - - template class Derived,bool sparqlQuery = false> - class TriplesParser { - - protected: - using element_type = std::conditional_t; - - explicit TriplesParser() { - current_triple = std::make_shared(); - }; - std::shared_ptr current_triple; - - - public: - - /** - * process to the next parsed triple. - */ - virtual void nextTriple() = 0; - - /** - * check whether there is a further triple - */ - virtual bool hasNextTriple() const = 0; - - /** - * get the current triple - */ - const element_type &getCurrentTriple() { - return *current_triple; - } - - - virtual ~TriplesParser() {}; - - - Iterator begin() - { - return static_cast*>(this)->begin_implementation(); - } - - bool end() { return false; } - - }; - - templateclass Derived,bool sparqlQuery = false> - class Iterator { - - private: - bool done_; - bool parser_done_; - Derived *triplesParser = nullptr; - - public: - explicit Iterator(Derived *triplesParser) : - done_{false}, parser_done_{false}, triplesParser{triplesParser} { - //check if there is at least one parsed triple - if (triplesParser->hasNextTriple()) - this->operator++(); - else - parser_done_ = true; - }; - - - void operator++() { - if (parser_done_) { - done_ = true; - } else { - triplesParser->nextTriple(); - if (not triplesParser->hasNextTriple()) - parser_done_ = true; - } - } - - void operator++(int) { operator++(); } - - operator bool() { return not done_; } - - const std::conditional_t & - operator*() { return triplesParser->getCurrentTriple(); } - }; - - - }; - - - - -#endif //RDF_PARSER_TRIPLESPARSER_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/States/BasicState.hpp b/include/Dice/rdf_parser/Parser/Turtle/States/BasicState.hpp deleted file mode 100644 index ee26945..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/States/BasicState.hpp +++ /dev/null @@ -1,171 +0,0 @@ - -#ifndef RDF_PARSER_BASICSTATE_HPP -#define RDF_PARSER_BASICSTATE_HPP - - -/** -States store information needed during and after the parsing. -For more information about states please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md#states - -*/ - - - -#include -#include -#include -#include - -#include "Dice/rdf_parser/RDF/Term.hpp" -#include "Dice/rdf_parser/Sparql/TriplePatternElement.hpp" - - -namespace { - using namespace rdf_parser::store::rdf; -} - - -namespace rdf_parser::Turtle { - namespace States { - - /* - * BasicState defines the data structures realted to rdf term - */ - template - class BasicState { - public: - BasicState() - { - element=std::make_shared>(); - } - protected: - - std::shared_ptr> element; - - //variables to deal with type and lan tags in literals - bool type_tag_found = false; - bool lang_tag_found = false; - bool iri_is_IRIREF; - std::string lan_tag; - std::string type_tag; - std::string literal_string; - std::string Blank_node_string; - - //dealing with base directives - std::string base = ""; - - int latest_BN_label = 1; - - std::map prefix_map; - - - public: - inline std::conditional_t &getElement() { return *element; } - - inline void setElement(std::conditional_t element) { *(this->element) = std::move(element); } - - - inline void addPrefix(std::string prefix, std::string value) { - prefix_map.insert(std::pair(prefix, value)); - } - - inline void setLan_tag(std::string lan_tag) { this->lan_tag = lan_tag; } - - inline void setType_tag(std::string type_tag) { this->type_tag = type_tag; } - - - inline void setType_tag_found(bool found) { - this->type_tag_found = found; - } - - inline void setLang_tag_found(bool found) { - this->lang_tag_found = found; - } - - inline void setIri_is_IRIREF(bool found) { - this->iri_is_IRIREF = found; - } - - void proccessRdfLiteral() { - //check if this RdfLiteral has IRI part - if (type_tag_found == true) { - std::string tag; - //set it again to false - type_tag_found = false; - //check if the type tag is iri or PREFIXED NAME and process it accordingly - if (iriIsIRIREF()) { - tag = type_tag; - } else { - int pos = type_tag.find(':'); - std::string prefix = type_tag.substr(0, pos); - tag = type_tag.substr(pos + 1, type_tag.length()); - if (hasPrefix(prefix)) { - std::string mappedPrefix = getPrefixValue(prefix); - tag = mappedPrefix + tag; - } else - tag = type_tag; - } - *element = (Literal(literal_string, std::nullopt, - tag)); - } - //check if this RdfLiteral has langTag part - else if (lang_tag_found == true) { - //set it again to false - lang_tag_found = false; - *element = Literal(literal_string, lan_tag, - std::nullopt); - } else - *element = Literal(literal_string, std::nullopt, - std::nullopt); - } - - std::string getType_tag() { - return type_tag; - } - - bool iriIsIRIREF() { - return iri_is_IRIREF; - } - - std::string getBlank_node_string() { - return Blank_node_string; - } - - inline void setLiteral_string(std::string literal_string) { this->literal_string = literal_string; } - - inline void - setBlank_node_string(std::string Blank_node_string) { this->Blank_node_string = Blank_node_string; } - - inline void setBase(std::string base) { this->base = base; } - - std::string getBase() { return base; } - - // create a unique label for a BlankNode - std::string createBlankNodeLabel() { - //TODO - return "b" + std::to_string(latest_BN_label++); - } - - inline bool hasPrefix(std::string prefix) { - auto found = prefix_map.find(prefix); - if (found != prefix_map.end()) - return true; - return false; - } - - inline std::string getPrefixValue(std::string prefix) { - auto found = prefix_map.find(prefix); - return found->second; - - } - -// std::string processVar(std::string var) -// { -// -// } - }; - } -} - - -#endif //RDF_PARSER_BASICSTATE_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/States/ConcurrentState.hpp b/include/Dice/rdf_parser/Parser/Turtle/States/ConcurrentState.hpp deleted file mode 100644 index 8c11b54..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/States/ConcurrentState.hpp +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef RDF_PARSER_CONCURRENTSTATE_HPP -#define RDF_PARSER_CONCURRENTSTATE_HPP - -/** -States store information needed during and after the parsing. -For more information about states please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md#states - -*/ - -#include -#include -#include -#include -//#include -#include - -#include "State.hpp" - -namespace rdf_parser::Turtle { - namespace States { - - /* - * ConcurrentState defines the data structures realted to the whole grammer (stores the parsed triples) - * in a cunncurent data structure (intel concureent queue) - */ - template>> - class ConcurrentState : public State { - - private: - //Defines threhold for triples in the Queue(should be assigned by the constructor) - unsigned int upperThrehold; - std::shared_ptr cv; - std::shared_ptr m; - std::shared_ptr termCountWithinThreholds; - std::shared_ptr termsCountIsNotEmpty; - std::shared_ptr parsingIsDone; - - std::shared_ptr cv2; - std::shared_ptr m2; - - public: - - explicit ConcurrentState(std::shared_ptr &parsingQueue, unsigned int upperThrehold, - std::shared_ptr cv, std::shared_ptr m, - std::shared_ptr cv2, std::shared_ptr m2, - std::shared_ptr termCountWithinThreholds, - std::shared_ptr termsCountIsNotEmpty, - std::shared_ptr parsingIsDone) - : State(parsingQueue) { - this->upperThrehold = upperThrehold; - this->cv = cv; - this->m = m; - this->cv2 = cv2; - this->m2 = m2; - this->termCountWithinThreholds = termCountWithinThreholds; - this->termsCountIsNotEmpty = termsCountIsNotEmpty; - this->parsingIsDone = parsingIsDone; - } - - inline void syncWithMainThread() override { - if (this->parsed_elements->read_available() > upperThrehold) { - std::unique_lock lk(*m); - *termCountWithinThreholds = false; - //set the parsing thread to sleep - cv->wait(lk, [&] { return termCountWithinThreholds->load(); }); - //the parsing thread wake from sleeping - } - } - - inline void insertTriple(std::conditional_t triple) override { - if (*termsCountIsNotEmpty == false) { - { - std::lock_guard lk(*m2); - *termsCountIsNotEmpty = true; - } - this->parsed_elements->push(std::move(triple)); - cv2->notify_one(); - } else { - this->parsed_elements->push(std::move(triple)); - } - } - - - void setPasrsingIsDone() override { - if (*termsCountIsNotEmpty == false) { - { - std::lock_guard lk(*m2); - *termsCountIsNotEmpty = true; - } - cv2->notify_one(); - } - *parsingIsDone = true; - - } - - - }; - - } -} -#endif //RDF_PARSER_CONCURRENTSTATE_HPP diff --git a/include/Dice/rdf_parser/Parser/Turtle/States/State.hpp b/include/Dice/rdf_parser/Parser/Turtle/States/State.hpp deleted file mode 100644 index 55ef576..0000000 --- a/include/Dice/rdf_parser/Parser/Turtle/States/State.hpp +++ /dev/null @@ -1,210 +0,0 @@ -#ifndef RDF_PARSER_STATE_HPP -#define RDF_PARSER_STATE_HPP - -/** -States store information needed during and after the parsing. -For more information about states please check https://github.com/taocpp/PEGTL/blob/master/doc/Actions-and-States.md#states - -*/ - -#include -#include - -#include -#include "BasicState.hpp" - - - - -namespace rdf_parser::Turtle { - namespace States { - - /* - * State defines the data structures realted to the whole grammer (stores the parsed triples) - */ - template>> - class State : public BasicState { - - protected: - - using BnplCollectionList=std::vector>; - using VerbObjectPair=std::pair, std::conditional_t>; - - std::shared_ptr parsed_elements; - - - //we use this to slove the case when 2 verbs are pushed into the stack without a pop between - //to solve the PredicateObjectList recursive problem. - std::stack> verb_stack; - int verb_stack_one_step_pre_size = 0; - int verb_stack_two_step_pre_size = 0; - - std::conditional_t subject; - - - //deal with multi terms and nesting - BnplCollectionList bnpl_collection_list; - //stack for dealing with collections - std::stack bnpl_collection_list_stack; - - std::vector verb_object_pair_list; - std::stack> verb_object_pair_list_stack; - //to deal with the case when there are BNPL + optional predicateObjectList - std::conditional_t first_BNPL; - - - public: - - explicit State(std::shared_ptr &parsingQueue) { - if (parsingQueue == nullptr) - parsingQueue = std::make_shared(); - parsed_elements = parsingQueue; - } - - virtual inline void syncWithMainThread() { - } - - virtual inline void insertTriple(std::conditional_t triple) { - this->parsed_elements->push(std::move(triple)); - } - - virtual void setPasrsingIsDone() { - } - - - - inline void clearTripleParameters() { -// first_BNPL = nullptr; - verb_stack_two_step_pre_size = 0; - verb_stack_one_step_pre_size = 0; - bnpl_collection_list.clear(); - verb_object_pair_list.clear(); - bnpl_collection_list.clear(); - } - - - inline void setSubject(std::conditional_t subject) { - this->subject = std::move(subject); - } - - - void proccessVerb() { - verb_stack_two_step_pre_size = verb_stack_one_step_pre_size; - verb_stack_one_step_pre_size = verb_stack.size(); - verb_stack.push(this->getElement()); - // - if (verb_stack.size() == verb_stack_two_step_pre_size + 2) { - verb_object_pair_list_stack.push(verb_object_pair_list); - verb_object_pair_list.clear(); - } - } - - void proccessCollection() { - std::vector> LocalParsedTerms; - URIRef first("rdf:first"); - URIRef rest("rdf:rest"); - URIRef nil("rdf:nil"); - if (bnpl_collection_list.size() == 0) { - BNode unlabeledNode(this->createBlankNodeLabel()); - std::conditional_t triple(unlabeledNode, first, nil); - LocalParsedTerms.push_back(triple); - *(this->element)= unlabeledNode; - } else { - bool lastElement = true; - for (auto object = bnpl_collection_list.rbegin(); - object != bnpl_collection_list.rend(); object++) { - BNode unlabeledNode(this->createBlankNodeLabel()); - std::conditional_t triple1; - triple1.setSubject(unlabeledNode); - triple1.setPredicate(rest); - - std::conditional_t triple2(unlabeledNode, first, *object); - //case 1 :last element : - if (lastElement) { - lastElement = false; - triple1.setObject(nil); - - } else { - triple1.setObject((LocalParsedTerms[LocalParsedTerms.size() - 1]).subject()); - } - *(this->element) = unlabeledNode; - LocalParsedTerms.push_back(triple1); - LocalParsedTerms.push_back(triple2); - - } - } - for (auto triple:LocalParsedTerms) - insertTriple(triple); - bnpl_collection_list.clear(); - bnpl_collection_list = bnpl_collection_list_stack.top(); - bnpl_collection_list_stack.pop(); - - } - - void proccessBlankNodePropertyList() { - //ToDO : manage the unlabeled nodes names - //create new Blank Node as subject - BNode unlabeledNode(this->createBlankNodeLabel()); - //add the the unlabeledNode to object list - *(this->element) = unlabeledNode; - auto verbobjectPairList = verb_object_pair_list; - //go through all the VerbObject pairs and make triples out of them with the unlabeled subject - for (auto &pair:verbobjectPairList) { - std::conditional_t triple(unlabeledNode, pair.first, pair.second); - insertTriple(triple); - } - verb_object_pair_list.clear(); - - - if (!verb_object_pair_list_stack.empty()) { - verb_object_pair_list = verb_object_pair_list_stack.top(); - verb_object_pair_list_stack.pop(); - } else { - first_BNPL = unlabeledNode; - } - bnpl_collection_list = bnpl_collection_list_stack.top(); - bnpl_collection_list_stack.pop(); - - } - - void proccessPredicateObjectListInner() { - auto verb = verb_stack.top(); - verb_stack.pop(); - for (auto &object:bnpl_collection_list) { - verb_object_pair_list.push_back(std::make_pair<>(verb, object)); - } - bnpl_collection_list.clear(); - } - - - inline void proccessTripleSeq() { - //add the subject to each pair in verbObjectsList and create a triple out of that - for (auto &pair:verb_object_pair_list) { - std::conditional_t triple(subject, pair.first, pair.second); - //add the created triple into the store - insertTriple(triple); - } - } - - inline void moveBnpl_collection_listIntoStack() { - bnpl_collection_list_stack.push(bnpl_collection_list); - bnpl_collection_list.clear(); - - } - - inline void pushCurrentTermIntoBnpl_collection_list() { - bnpl_collection_list.push_back(this->getElement()); - } - - - std::conditional_t &getFirst_BNPL() { - return first_BNPL; - } - - - }; - } -} - - -#endif //RDF_PARSER_STATE_HPP diff --git a/include/Dice/rdf_parser/RDF/Term.hpp b/include/Dice/rdf_parser/RDF/Term.hpp deleted file mode 100644 index ca47aff..0000000 --- a/include/Dice/rdf_parser/RDF/Term.hpp +++ /dev/null @@ -1,373 +0,0 @@ -#ifndef RDF_PARSER_TERM_HPP -#define RDF_PARSER_TERM_HPP - - -#include -#include -#include -#include -#include -#include -#include - - -namespace rdf_parser::store::rdf { - class Term; -} - -namespace rdf_parser::Turtle { - - rdf_parser::store::rdf::Term parseTerm(std::string text); -} - - - -namespace rdf_parser::store::rdf { - - /** - * This is a portable a string view. It is not bound to a single string but applicable to any copy of a string. - */ - struct unbound_string_view { - std::ptrdiff_t start = 0; - std::size_t count = 0; - - /** - * Get a string_view for the given string. - * @param str string to be viewed - * @return string_view on the string - */ - [[nodiscard]] std::string_view string_view(const std::string &str) const { - return {str.c_str() + start, count}; - } - }; - - class Literal; - - class BNode; - - class URIRef; - - /** - * An RDF Term. - */ - class Term { - public: - /** - * Type of a Term. - */ - enum NodeType { - None = 0, - URIRef_, - BNode_, - Literal_ - - }; - - protected: - std::string identifier_{}; - NodeType node_type_{}; - unbound_string_view value_{}; - // TODO: we may use std::variant for _lang and _data_type - unbound_string_view lang_{}; - unbound_string_view data_type_{}; - - /** - * Is to being used by subtypes URIRef, BNode and Literal. - * @param identifier - * @param node_type - */ - Term(std::string identifier, NodeType node_type) : identifier_(std::move(identifier)), - node_type_(node_type) {} - - public: - explicit Term(std::string identifier) : Term(make_term(std::move(identifier))) {} - - static Term make_term(std::string identifier) { - return rdf_parser::Turtle::parseTerm(identifier); - } - - public: - - - Term() = default; - - Term(Term &) = default; - - Term(const Term &) = default; - - Term(Term &&) = default; - - Term &operator=(const Term &) = default; - - Term &operator=(Term &&) = default; - - - [[nodiscard]] const std::string &getIdentifier() const { - return identifier_; - } - - [[nodiscard]] inline const NodeType &type() const { - return node_type_; - } - - [[nodiscard]] inline bool isLiteral() const { - return node_type_ == NodeType::Literal_; - } - - [[nodiscard]] inline bool isBNode() const { - return node_type_ == NodeType::BNode_; - } - - [[nodiscard]] inline bool isURIRef() const { - return node_type_ == NodeType::URIRef_; - } - - [[nodiscard]] inline Literal &castLiteral(); - - [[nodiscard]] inline BNode &castBNode(); - - [[nodiscard]] inline URIRef &castURIRef(); - - [[nodiscard]] inline const Literal &castLiteral() const; - - [[nodiscard]] inline const BNode &castBNode() const; - - [[nodiscard]] inline const URIRef &castURIRef() const; - - [[nodiscard]] inline std::string_view value() const { - return value_.string_view(identifier_); - } - - inline bool operator==(const Term &rhs) const { - return identifier_ == rhs.identifier_; - } - - inline bool operator!=(const Term &rhs) const { - return identifier_ != rhs.identifier_; - } - - inline bool operator<(const Term &rhs) const { - return identifier_ < rhs.identifier_; - } - - inline bool operator>(const Term &rhs) const { - return identifier_ > rhs.identifier_; - } - - friend bool operator==(const Term &lhs, const std::unique_ptr &rhs) { - return lhs == *rhs; - } - - friend bool operator==(const std::unique_ptr &lhs, const Term &rhs) { - return *lhs == rhs; - } - - friend bool operator==(const std::unique_ptr &lhs, const std::unique_ptr &rhs) { - return *lhs == *rhs; - } - - friend bool operator==(const Term *lhs, const std::unique_ptr &rhs) { - return *lhs == *rhs; - } - - friend bool operator==(const std::unique_ptr &lhs, const Term *rhs) { - return *lhs == *rhs; - } - - [[nodiscard]] std::size_t hash() const { - return absl::Hash()(identifier_); - } - }; - - class URIRef : public Term { - - public: - explicit URIRef(const std::string &uri) : Term(fmt::format("<{}>", uri), NodeType::URIRef_) { - this->value_ = {1, uri.size()}; - }; - - [[nodiscard]] inline std::string_view uri() const { - return value(); - } - }; - - class BNode : public Term { - public: - explicit BNode(const std::string &bnode_label) : Term(fmt::format("_:{}", bnode_label), NodeType::BNode_) { - this->value_ = {2, bnode_label.size()}; - }; - - [[nodiscard]] inline std::string_view bnodeLabel() const { - return value(); - } - - }; - - class Literal : public Term { - public: - Literal(const std::string &value, const std::optional &lang, - const std::optional &type) { - node_type_ = NodeType::Literal_; - if (lang) { - this->identifier_ = fmt::format("\"{}\"@{}", value, lang.value()); - this->lang_ = {(std::ptrdiff_t) (1 + value.size() + 1 + 1), lang->size()}; - } else if (type and (type != "http://www.w3.org/2001/XMLSchema#string")) { - // TODO: handle default cases - this->identifier_ = fmt::format("\"{}\"^^<{}>", value, type.value()); - this->data_type_ = {(std::ptrdiff_t) (1 + value.size() + 1 + 2 + 1), type->size()}; - // TODO: manage types types - } else { - this->identifier_ = fmt::format("\"{}\"", value); - } - this->value_ = {1, value.size()}; - } - - [[nodiscard]] inline std::string_view dataType() const { - return data_type_.string_view(identifier_); - } - - [[nodiscard]] inline std::string_view lang() const { - return lang_.string_view(identifier_); - } - - [[nodiscard]] inline bool hasDataType() const { - return data_type_.count != 0; - } - - [[nodiscard]] inline bool hasLang() const { - return lang_.count != 0; - } - - }; - - Literal &Term::castLiteral() { - return static_cast(*this); - } - - BNode &Term::castBNode() { - return static_cast(*this); - } - - URIRef &Term::castURIRef() { - return static_cast(*this); - } - - const Literal &Term::castLiteral() const { - return static_cast< const Literal &>(*this); - } - - const BNode &Term::castBNode() const { - return static_cast< const BNode &>(*this); - } - - const URIRef &Term::castURIRef() const { - return static_cast< const URIRef &>(*this); - } - - -}; - - -template<> -struct std::hash { - size_t operator()(const rdf_parser::store::rdf::Term &v) const { - return v.hash(); - } -}; - -template<> -struct std::hash { - size_t operator()(const rdf_parser::store::rdf::Term *&v) const { - return v->hash(); - } -}; - -namespace rdf_parser::store::rdf { - struct TermHash { - size_t operator()(const rdf_parser::store::rdf::Term &v) const { - return v.hash(); - } - - size_t operator()(const std::unique_ptr &v) const { - return v->hash(); - } - - size_t operator()(const rdf_parser::store::rdf::Term *&v) const { - return v->hash(); - } - }; -} - -template<> -struct fmt::formatter { - template - constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } - - template - auto format(const rdf_parser::store::rdf::Term *p, FormatContext &ctx) { - if (p != nullptr) - return format_to(ctx.out(), p->getIdentifier()); - else - return format_to(ctx.out(), ""); - } -}; - -template<> -struct fmt::formatter { - template - constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } - - template - auto format(const rdf_parser::store::rdf::Term &p, FormatContext &ctx) { - return format_to(ctx.out(), p.getIdentifier()); - } -}; - - -#include "Dice/rdf_parser/Parser/Turtle/States/BasicState.hpp" -#include -#include "Dice/rdf_parser/Parser/Turtle/Grammer.hpp" -#include "Dice/rdf_parser/Parser/Turtle/Actions/BasicActions.hpp" - - -namespace rdf_parser::Turtle { - - class TermParser { - - - public: - - static Term makeTerm(std::string text) { - try { - using namespace tao::pegtl; - string_input input(text, "the text"); - States::BasicState state; - parse(input, state); - return std::move(state.getElement()); - } - catch (std::exception &e) { - throw e; - } - } - static bool isTermParsable(std::string text) { - try { - using namespace tao::pegtl; - string_input input(text, "the text"); - States::BasicState state; - parse(input, state); - return true; - } - catch (std::exception &e) { - return false; - } - } - - - }; - - Term parseTerm(std::string text){ - return TermParser::makeTerm(std::move(text)); - } -}; - -#endif //RDF_PARSER_TERM_HPP \ No newline at end of file diff --git a/include/Dice/rdf_parser/RDF/Triple.hpp b/include/Dice/rdf_parser/RDF/Triple.hpp deleted file mode 100644 index 16c4fd9..0000000 --- a/include/Dice/rdf_parser/RDF/Triple.hpp +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef RDF_PARSER_TRIPLE_HPP -#define RDF_PARSER_TRIPLE_HPP - -/** - * An RDF triple - */ - - -#include "Dice/rdf_parser/RDF/Term.hpp" - -namespace rdf_parser::store::rdf { - class Triple { - - std::array terms_{}; - - public: - Triple() {} - - Triple(Term subject, Term predicate, Term object) : - terms_{subject, predicate, object} {} - - [[nodiscard]] const Term &subject() const { return terms_[0]; } - - [[nodiscard]] Term &subject() { return terms_[0]; } - - [[nodiscard]] const Term &predicate() const { return terms_[1]; } - - [[nodiscard]] Term &predicate() { return terms_[1]; } - - [[nodiscard]] const Term &object() const { return terms_[2]; } - - [[nodiscard]] Term &object() { return terms_[2]; } - - Term &operator[](std::size_t pos) { return terms_[pos]; } - - const Term &operator[](std::size_t pos) const { return terms_[pos]; } - - void setSubject(Term subject) { terms_[0] = std::move(subject); } - - void setPredicate(Term predicate) { terms_[1] = std::move(predicate); } - - void setObject(Term object) { terms_[2] = std::move(object); } - - }; -} - -#endif //RDF_PARSER_TRIPLE_HPP diff --git a/include/Dice/rdf_parser/Sparql/TriplePatternElement.hpp b/include/Dice/rdf_parser/Sparql/TriplePatternElement.hpp deleted file mode 100644 index 4722fef..0000000 --- a/include/Dice/rdf_parser/Sparql/TriplePatternElement.hpp +++ /dev/null @@ -1,39 +0,0 @@ -// -// Created by fakhr on 29.04.20. -// - -#ifndef RDF_PARSER_SPARQL_TRIPLEPATTERNELEMENT_HPP -#define RDF_PARSER_SPARQL_TRIPLEPATTERNELEMENT_HPP - -#include - -#include "TripleVariable.hpp" -#include - -namespace rdf_parser::SparqlQuery { - using VarOrTerm = std::variant; - class TriplePatternElement { - private: - std::array triplePattern; - - public: - - TriplePatternElement() {}; - explicit TriplePatternElement(VarOrTerm element1, VarOrTerm element2, VarOrTerm element3) : triplePattern{ - element1, element2, element3} {}; - - - VarOrTerm subject() { return triplePattern.at(0); }; - - VarOrTerm predicate() { return triplePattern.at(1); }; - - VarOrTerm object() { return triplePattern.at(2); }; - - void setSubject(VarOrTerm subject) { triplePattern.at(0)=subject;}; - - void setPredicate(VarOrTerm predicate) {triplePattern.at(1)=predicate;}; - - void setObject(VarOrTerm object) { triplePattern.at(2)=object;}; - }; -} -#endif //RDF_PARSER_SPARQL_TRIPLEPATTERNELEMENT_HPP diff --git a/include/Dice/rdf_parser/Sparql/TripleVariable.hpp b/include/Dice/rdf_parser/Sparql/TripleVariable.hpp deleted file mode 100644 index 77947b4..0000000 --- a/include/Dice/rdf_parser/Sparql/TripleVariable.hpp +++ /dev/null @@ -1,52 +0,0 @@ - -#ifndef RDF_PARSER_SPARQL_TRIPLEVARIABLE_HPP -#define RDF_PARSER_SPARQL_TRIPLEVARIABLE_HPP - -namespace rdf_parser::SparqlQuery { - class TripleVariable { - private: - std::string name; - bool is_anonym; - - public: - - TripleVariable(){}; - explicit TripleVariable(std::string var_name, bool anonym = false) : name{std::move(var_name)}, - is_anonym{anonym} {} - - inline bool operator==(const TripleVariable &rhs) const { - return name == rhs.name; - } - - inline bool operator!=(const TripleVariable &rhs) const { - return name != rhs.name; - } - - inline bool operator<(const TripleVariable &rhs) const { - return name < rhs.name; - } - - inline bool operator>(const TripleVariable &rhs) const { - return name > rhs.name; - } - - inline void setName(std::string name){ - this->name=name; - } - - inline void setIs_anonym(bool is_anonym){ - this->is_anonym=is_anonym; - } - - std::string getName() - { - return name; - } - - bool isAnon(){ - return is_anonym; - } - - }; -} -#endif //RDF_PARSER_SPARQL_TRIPLEVARIABLE_HPP diff --git a/include/Dice/rdf_parser/util/scoped_thread.hpp b/include/Dice/rdf_parser/util/scoped_thread.hpp deleted file mode 100644 index b8468ba..0000000 --- a/include/Dice/rdf_parser/util/scoped_thread.hpp +++ /dev/null @@ -1,28 +0,0 @@ - -#ifndef PEGTL_RDF_PARSER_SCOPED_THREAD_HPP -#define PEGTL_RDF_PARSER_SCOPED_THREAD_HPP - -namespace rdf_parser { - namespace util { - -#include -#include - - class ScopedThread { - std::thread t; - public: - explicit ScopedThread(std::thread t_) : t(std::move(t_)) { - if (!t.joinable()) throw std::logic_error("No thread"); - } - - ~ScopedThread() { - t.join(); - } - - ScopedThread(ScopedThread &) = delete; - - ScopedThread &operator=(ScopedThread const &) = delete; - }; - } -} -#endif //PEGTL_RDF_PARSER_SCOPED_THREAD_HPP diff --git a/include/Dice/sparql-parser/internal/TriplesBlockStringParser.hpp b/include/Dice/sparql-parser/internal/TriplesBlockStringParser.hpp new file mode 100644 index 0000000..b921e09 --- /dev/null +++ b/include/Dice/sparql-parser/internal/TriplesBlockStringParser.hpp @@ -0,0 +1,39 @@ +#ifndef RDF_PARSER_TRIPLESBLOCKSTRINGPARSER_HPP +#define RDF_PARSER_TRIPLESBLOCKSTRINGPARSER_HPP + + +#include + +#include "Dice/rdf-parser/internal/Turtle/Parsers/BaseStringParser.hpp" + +/** + * TriplesBlockStringParser is responsible for parsing sparql's tripleBlocks from string sources. + */ + + +namespace Dice::sparql_parser::internal { + + class TriplesBlockStringParser : public ::Dice::rdf_parser::internal::Turtle::Parsers::BaseStringParser { + + + public: + /** + * The constructor start the parsing.if the input is not valid it will throws an exception. + * it also invoke nextTriple to have the first triple ready for using . + * @param text the string to parse + */ + explicit TriplesBlockStringParser(std::string text) : BaseStringParser(std::move(text)) {} + + + /** + * The constructor start the parsing.if the input is not valid it will throws and exception. + * it also invoke nextTriple to have the first triple ready for using . + * @param text the string to parse + * @param prefix_map defines prefixes to be added before parsing. In a robin_hood map. + */ + TriplesBlockStringParser(std::string text, const robin_hood::unordered_map &prefix_map) : BaseStringParser(std::move(text), + prefix_map){}; + }; +}// namespace Dice::rdf_parser::Turtle::parsers + +#endif//RDF_PARSER_TRIPLESBLOCKSTRINGPARSER_HPP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index da18172..13bae72 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,23 +1,54 @@ -cmake_minimum_required(VERSION 3.12) +include(FetchContent) +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.8.1 + GIT_SHALLOW TRUE +) +set(BUILD_GMOCK OFF CACHE BOOL "" FORCE) +set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) +add_library(GTest::GTest ALIAS gtest) +add_library(GTest::Main ALIAS gtest_main) + +include(GoogleTest) +# add the executable for all tests +add_executable(tests Tests.cpp) -set(CMAKE_CXX_STANDARD 17) +target_link_libraries(tests + GTest::GTest + GTest::Main + rdf-parser + ) +set_property(TARGET tests PROPERTY CXX_STANDARD 20) -include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) -conan_basic_setup() +gtest_discover_tests(tests) -FIND_PACKAGE(GTest REQUIRED CONFIG) -find_package(GMock REQUIRED) +# copy files for testing to the binary folder +file(COPY datasets DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +#download some files for the file parsers + +function(download_file url filename) + if (NOT EXISTS ${filename}) + file(DOWNLOAD ${url} ${filename} + TIMEOUT 60 # seconds + ) + endif () +endfunction(download_file) + +download_file(https://hobbitdata.informatik.uni-leipzig.de/ISWC2020_Tentris/swdf.zip + ${CMAKE_BINARY_DIR}/tests/swdf.zip) + +if (NOT EXISTS ${CMAKE_BINARY_DIR}/tests/swdf.nt) + execute_process( + COMMAND ${CMAKE_COMMAND} -E tar xf ${CMAKE_BINARY_DIR}/tests/swdf.zip + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/tests/datasets/ + ) +endif () + -# add the exectutable for all tests -add_executable(tests Tests.cpp) -target_link_libraries(tests - rdf_parser - ${CONAN_LIBS} - ) -set_property(TARGET tests PROPERTY CXX_STANDARD 17) -# copy files for testing to the binary folder -file(COPY datasets DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/tests/SparqlTriplesBlockParsingTests.cpp b/tests/SparqlTriplesBlockParsingTests.cpp new file mode 100644 index 0000000..58ced90 --- /dev/null +++ b/tests/SparqlTriplesBlockParsingTests.cpp @@ -0,0 +1,91 @@ +#include + +#include + +namespace Dice::tests::rdf_parser::sparql_triple_block_parsing_tests { + + using namespace Dice::sparql_parser::internal; + + TEST(SparqlTriplesBlockParsingTests, F1) { + + TriplesBlockStringParser parser("?g ?who . "); + auto it = parser.begin(); + while (it) { + auto x = *it; + it++; + } + } + + TEST(SparqlTriplesBlockParsingTests, AddedprefixTest) { + + robin_hood::unordered_map prefixes; + prefixes.emplace(std::pair("foaf", "http://xmlns.com/foaf/0.1/")); + TriplesBlockStringParser parser("?x foaf:name ?name .", prefixes); + auto it = parser.begin(); + while (it) { + auto x = *it; + it++; + } + } + + TEST(SparqlTriplesBlockParsingTests, AddedprefixTest2) { + + robin_hood::unordered_map prefixes; + //prefixes.insert(std::pair("","http://example.org/book/")); + prefixes.emplace("dc", "http://purl.org/dc/elements/1.1/"); + prefixes.emplace("ns", "http://example.org/ns#"); + TriplesBlockStringParser parser( + "?book dc:title ?title ;\n" + " ns:price ?price .", + prefixes); + auto it = parser.begin(); + while (it) { + auto x = *it; + it++; + } + } + + + TEST(SparqlTriplesBlockParsingTests, tripleBlock) { + + + TriplesBlockStringParser parser( + "?x ?y .\n" + "?x ?nameX . "); + auto it = parser.begin(); + while (it) { + auto x = *it; + it++; + } + } + + TEST(SparqlTriplesBlockParsingTests, tripleBlock2) { + + robin_hood::unordered_map prefixes; + prefixes.emplace("ex", "http://example.org/"); + TriplesBlockStringParser parser("?buch ex:hatVerlag . ?buch ex:titel ?title . ?buch ex:autor ?autor . ", prefixes); + auto it = parser.begin(); + while (it) { + auto x = *it; + it++; + } + } + + + TEST(SparqlTriplesBlockParsingTests, BlankNodes) { + using VarOrTerm = Dice::sparql::VarOrTerm; + + robin_hood::unordered_map prefixes; + prefixes.emplace("wde", "http://www.wikidata.org/entity/"); + prefixes.emplace("wdt", "http://www.wikidata.org/prop/direct/"); + TriplesBlockStringParser parser("?var1 _:b0 . _:b0 wde:Q202479 ; ?var2 .", prefixes); + auto it = parser.begin(); + while (it) { + auto x = *it; + VarOrTerm object = x.object(); + VarOrTerm predicate = x.predicate(); + VarOrTerm subject = x.subject(); + it++; + } + } +}// namespace Dice::tests::rdf_parser::sparql_triple_block_parsing_tests \ No newline at end of file diff --git a/tests/TermParserTests.cpp b/tests/TermParserTests.cpp deleted file mode 100644 index afebc28..0000000 --- a/tests/TermParserTests.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -namespace { - using namespace rdf_parser::Turtle; - using namespace rdf_parser::store::rdf; -} - - - - -TEST(TermParserTests,parsableLiterals1) { - - bool t1=rdf_parser::Turtle::TermParser::isTermParsable("\" hello \""); - bool t2=rdf_parser::Turtle::TermParser::isTermParsable("\" hello \'hello\' \""); - ASSERT_EQ(t1, true); - ASSERT_EQ(t2, true); -} -// -// -TEST(TermParserTests,unparsableLiterals1) { - - bool t1 = rdf_parser::Turtle::TermParser::isTermParsable("\" hello "); - bool t2 = rdf_parser::Turtle::TermParser::isTermParsable(" hello \""); - ASSERT_EQ(t1, false); - ASSERT_EQ(t2, false); -} diff --git a/tests/TermTests.cpp b/tests/TermTests.cpp index 0435967..92dc4cc 100644 --- a/tests/TermTests.cpp +++ b/tests/TermTests.cpp @@ -1,62 +1,101 @@ +#include +#include #include -#include - -namespace { - using namespace rdf_parser::Turtle; - using namespace rdf_parser::store::rdf; -} - - -TEST(TermTests, parseIRI) { - StringParser<> parser("@prefix pref: . " - " a pref:x."); - StringParser<>::Iterator iterator = parser.begin(); - ASSERT_TRUE(bool(iterator)); - const Triple &triple = *iterator; - // check if the non-prefix IRI is correct - ASSERT_EQ(triple.subject().getIdentifier(), ""); - // check if the prefix IRI is correct - ASSERT_EQ(triple.subject().getIdentifier(), ""); -} - -TEST(TermTests, parseStringTerm) { - StringParser<> parser("@prefix xsd: . " - " a \"text\". " - " a \"text\"^^. " - " a \"text\"^^xsd:string. "); - StringParser<>::Iterator iterator = parser.begin(); - ASSERT_TRUE(bool(iterator)); - Triple plain = *iterator; - ++iterator; - Triple full_typed = *iterator; - ++iterator; - Triple prefix_typed = *iterator; - - // check if they are correct - ASSERT_EQ(plain.object().getIdentifier(), "\"text\""); - ASSERT_EQ(full_typed.object().getIdentifier(), "\"text\""); - ASSERT_EQ(prefix_typed.object().getIdentifier(), "\"text\""); -} - - -TEST(TermTests, parseNumbers) { - StringParser<> parser("@prefix : . \n" - " " - ":atomicNumber 2 ;" - " :atomicMass 4.002602 ;" - " :specificGravity 1.663E-4 . "); - StringParser<>::Iterator iterator = parser.begin(); - ASSERT_TRUE(bool(iterator)); - Triple integerNumber = *iterator; - ++iterator; - Triple decimalNumber = *iterator; - ++iterator; - Triple doubleNumber = *iterator; - - // check if they are correct - ASSERT_EQ(integerNumber.object().getIdentifier(), "\"2\"^^"); - ASSERT_EQ(decimalNumber.object().getIdentifier(), "\"4.002602\"^^"); - ASSERT_EQ(doubleNumber.object().getIdentifier(), "\"1.663E-4\"^^"); -} +namespace Dice::tests::rdf_parser::term_tests { + using namespace Dice::rdf_parser::internal::Turtle; + using namespace Dice::rdf; + using namespace Dice::rdf_parser::internal::Turtle::Parsers; + using namespace Dice::rdf_parser; + TEST(TermTests, compileHash) { + std::unordered_set x; + std::unordered_set x2; + std::unordered_set y; + std::unordered_set y2; + } + + TEST(TermTests, compileFormattedOutput) { + Term term = parse_term(""); + std::cout << fmt::format("{}", term); + Triple triple{parse_term(""), parse_term(""), parse_term("")}; + std::cout << fmt::format("{}", triple); + + sparql::TriplePattern triplePattern{parse_term(""), sparql::Variable("my_var", false), sparql::Variable("my_var", true)}; + std::cout << fmt::format("{}", triplePattern); + } + + TEST(TermTests, parseSingleIRI) { + Term term = parse_term(""); + ASSERT_EQ(term.type(), Term::NodeType::URIRef_); + } + + TEST(TermTests, TripleEqual) { + Triple triple1{parse_term(""), parse_term(""), parse_term("")}; + Triple triple2{parse_term(""), parse_term(""), parse_term("")}; + Triple triple3{parse_term(""), parse_term(""), parse_term("")}; + ASSERT_EQ(triple1, triple2); + ASSERT_NE(triple1, triple3); + } + + TEST(TermTests, parseTriple) { + Triple triple{parse_term(""), parse_term(""), parse_term("")}; + for (const auto &term : triple) { + ASSERT_EQ(term.type(), Term::NodeType::URIRef_); + } + } + + // TODO: Name of file's and tests' names seem odd. + + TEST(TermTests, parseIRI) { + TurtleStringParser parser("@prefix pref: . " + " a pref:x."); + auto iterator = parser.begin(); + ASSERT_TRUE(bool(iterator)); + const Triple &triple = *iterator; + // check if the non-prefix IRI is correct + ASSERT_EQ(triple.subject().getIdentifier(), ""); + // check if the prefix IRI is correct + ASSERT_EQ(triple.subject().getIdentifier(), ""); + } + + TEST(TermTests, parseStringTerm) { + TurtleStringParser parser("@prefix xsd: . " + " a \"text\". " + " a \"text\"^^. " + " a \"text\"^^xsd:string. "); + auto iterator = parser.begin(); + ASSERT_TRUE(bool(iterator)); + Triple plain = *iterator; + ++iterator; + Triple full_typed = *iterator; + ++iterator; + Triple prefix_typed = *iterator; + + // check if they are correct + ASSERT_EQ(plain.object().getIdentifier(), "\"text\""); + ASSERT_EQ(full_typed.object().getIdentifier(), "\"text\""); + ASSERT_EQ(prefix_typed.object().getIdentifier(), "\"text\""); + } + + + TEST(TermTests, parseNumbers) { + TurtleStringParser parser("@prefix : . \n" + " " + ":atomicNumber 2 ;" + " :atomicMass 4.002602 ;" + " :specificGravity 1.663E-4 . "); + auto iterator = parser.begin(); + ASSERT_TRUE(bool(iterator)); + Triple integerNumber = *iterator; + ++iterator; + Triple decimalNumber = *iterator; + ++iterator; + Triple doubleNumber = *iterator; + + // check if they are correct + ASSERT_EQ(integerNumber.object().getIdentifier(), "\"2\"^^"); + ASSERT_EQ(decimalNumber.object().getIdentifier(), "\"4.002602\"^^"); + ASSERT_EQ(doubleNumber.object().getIdentifier(), "\"1.663E-4\"^^"); + } +}// namespace Dice::tests::rdf_parser::term_tests diff --git a/tests/Tests.cpp b/tests/Tests.cpp index 6a0eb15..0283359 100644 --- a/tests/Tests.cpp +++ b/tests/Tests.cpp @@ -1,17 +1,15 @@ #include -#include "TurtleOfficalExamplesTest.cpp" -//#include "TurtleOfficialPositiveTests.cpp" -//#include "TurtleOfficialNegativeTests.cpp" -//#include "TurtleOfficialEvaluationTests.cpp" -//#include "TurtleOfficialNegativeEvaluationTests.cpp" -//#include "TurtleParserFilesTests.cpp" -//#include "TurtleParserConcurrentTests.cpp" -//#include "TermParserTests.cpp" -//#include "TermTests.cpp" -#include "TurtlePartialGrammerTests.cpp" +#include "SparqlTriplesBlockParsingTests.cpp" +#include "TermTests.cpp" +#include "TurtleOfficalExamplesTest.cpp" +#include "TurtleOfficialEvaluationTests.cpp" +#include "TurtleOfficialNegativeEvaluationTests.cpp" +#include "TurtleOfficialNegativeTests.cpp" +#include "TurtleOfficialPositiveTests.cpp" +#include "TurtleParserFilesTests.cpp" int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/tests/TurtleOfficalExamplesTest.cpp b/tests/TurtleOfficalExamplesTest.cpp index ccfa15c..4d336cb 100644 --- a/tests/TurtleOfficalExamplesTest.cpp +++ b/tests/TurtleOfficalExamplesTest.cpp @@ -1,519 +1,536 @@ #include -#include - -namespace { -using namespace rdf_parser::Turtle; -} - - -TEST(TurtleOfficalExamplesTests, EXAMPLE1) { - rdf_parser::Turtle::parsers::StringParser parser("@base .\n" - "@prefix rdf: .\n" - "@prefix rdfs: .\n" - "@prefix foaf: .\n" - "@prefix rel: .\n" - "\n" - "<#green-goblin>\n" - " rel:enemyOf <#spiderman> ;\n" - " a foaf:Person ; \n" - " foaf:name \"Green Goblin\" .\n" - "\n" - "<#spiderman>\n" - " rel:enemyOf <#green-goblin> ;\n" - " a foaf:Person ;\n" - " foaf:name \"Spiderman\", \"Человек-паук\"@ru ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } - - -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE2) { - - rdf_parser::Turtle::parsers::StringParser parser( - " .\n"); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } - - -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE3) { - - rdf_parser::Turtle::parsers::StringParser parser( - " ;\n" - "\t\t\t\t \"Spiderman\" ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE4) { - - rdf_parser::Turtle::parsers::StringParser parser( - " .\n" - " \"Spiderman\" ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE5) { - - rdf_parser::Turtle::parsers::StringParser parser( - " \"Spiderman\", \"Человек-паук\"@ru ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE6) { - - rdf_parser::Turtle::parsers::StringParser parser(" \"Spiderman\" .\n" - " \"Человек-паук\"@ru ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE7) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix somePrefix: .\n" - "\n" - " somePrefix:enemyOf ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE8) { - - rdf_parser::Turtle::parsers::StringParser parser("PREFIX somePrefix: \n" - "\n" - " somePrefix:enemyOf .\n" - "\t\t\t\t "); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE9) { - - rdf_parser::Turtle::parsers::StringParser parser( - " .\n" - "@base .\n" - " . \n" - "BASE \n" - " . \n" - "@prefix p: .\n" - "p:subject3 p:predicate3 p:object3 . \n" - "PREFIX p: \n" - "p:subject3 p:predicate3 p:object3 . \n" - "@prefix p: . \n" - "p:subject4 p:predicate4 p:object4 . \n" - "@prefix : . \n" - ":subject5 :predicate5 :object5 . \n" - ":subject6 a :subject7 . \n" - " a :subject8 ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE10) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix foaf: .\n" - "\n" - " foaf:name \"Green Goblin\" .\n" - "\n" - " foaf:name \"Spiderman\" ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE11) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix show: .\n" - "show:218 show:blurb '''This is a multi-line \n" - "literal with many quotes''' ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE12) { - - rdf_parser::Turtle::parsers::StringParser<> parser( - "@prefix : . \n" - " \n" - " :atomicNumber 2 ; \n" - " :atomicMass 4.002602 ; \n" - " :specificGravity 1.663E-4 . "); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE13) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix : .\n" - "\n" - " :isLandlocked false . "); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE14) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix foaf: .\n" - "\n" - "_:alice foaf:knows _:bob .\n" - "_:bob foaf:knows _:alice ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE15) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix foaf: .\n" - "\n" - "[] foaf:knows [ foaf:name \"Bob\" ] ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE16) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix foaf: .\n" - "\n" - "[ foaf:name \"Alice\" ] foaf:knows [\n" - " foaf:name \"Bob\" ;\n" - " foaf:knows [\n" - " foaf:name \"Eve\" ] ;\n" - " foaf:mbox ] ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE17) { - - rdf_parser::Turtle::parsers::StringParser parser("_:a \"Alice\" .\n" - "_:a _:b .\n" - "_:b \"Bob\" .\n" - "_:b _:c .\n" - "_:c \"Eve\" .\n" - "_:b ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE18) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix : .\n" - ":subject :predicate ( :a :b :c ) .\n" - "\n" - ":subject :predicate2 () ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE19) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix rdf: .\n" - "@prefix dc: .\n" - "@prefix ex: .\n" - "\n" - "\n" - " dc:title \"RDF/XML Syntax Specification (Revised)\" ;\n" - " ex:editor [\n" - " ex:fullname \"Dave Beckett\";\n" - " ex:homePage \n" - " ] ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE20) { - - rdf_parser::Turtle::parsers::StringParser parser( - "PREFIX : \n" - ":a :b ( \"apple\" \"banana\" ) ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE21) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix : .\n" - "@prefix rdf: .\n" - ":a :b\n" - " [ rdf:first \"apple\";\n" - " rdf:rest [ rdf:first \"banana\";\n" - " rdf:rest rdf:nil ]\n" - " ] ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE22) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix : .\n" - "\n" - ":a :b \"The first line\\nThe second line\\n more\" .\n" - "\n" - ":a :b \"\"\"The first line\n" - "The second line\n" - " more\"\"\" ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE23) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix : .\n" - "(1 2.0 3E1) :p \"w\" ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE24) { - - rdf_parser::Turtle::parsers::StringParser parser("@prefix rdf: .\n" - "PREFIX : \n" - " _:b0 rdf:first 1 ;\n" - " rdf:rest _:b1 .\n" - " _:b1 rdf:first 2.0 ;\n" - " rdf:rest _:b2 .\n" - " _:b2 rdf:first 3E1 ;\n" - " rdf:rest rdf:nil .\n" - " _:b0 :p \"w\" . "); - auto it=parser.begin(); - std::cout << std::endl; - it++; - while(it) { - auto triple= *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - - -TEST(TurtleOfficalExamplesTests, EXAMPLE25) { - - rdf_parser::Turtle::parsers::StringParser parser("PREFIX : \n" - "(1 [:p :q] ( 2 ) ) :p2 :q2 ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} - -TEST(TurtleOfficalExamplesTests, EXAMPLE26) { - - rdf_parser::Turtle::parsers::StringParser<> parser("@prefix rdf: .\n" - "PREFIX : \n" - " _:b0 rdf:first 1 ;\n" - " rdf:rest _:b1 .\n" - " _:b1 rdf:first _:b2 .\n" - " _:b2 :p :q .\n" - " _:b1 rdf:rest _:b3 .\n" - " _:b3 rdf:first _:b4 .\n" - " _:b4 rdf:first 2 ;\n" - " rdf:rest rdf:nil .\n" - " _:b3 rdf:rest rdf:nil ."); - auto it=parser.begin(); - std::cout << std::endl; - while (it) { - auto triple = *it; - std::cout << triple.subject().getIdentifier() << " " - << triple.predicate().getIdentifier() << " " - << triple.object().getIdentifier() - << std::endl; - it++; - } -} +#include + + +namespace Dice::tests::rdf_parser::turtle_official_examples_tests { + + using namespace Dice::rdf_parser; + + + TEST(TurtleOfficalExamplesTests, EXAMPLE1) { + TurtleStringParser parser( + "@base .\n" + "@prefix rdf: .\n" + "@prefix rdfs: .\n" + "@prefix foaf: .\n" + "@prefix rel: .\n" + "\n" + "<#green-goblin>\n" + " rel:enemyOf <#spiderman> ;\n" + " a foaf:Person ; \n" + " foaf:name \"Green Goblin\" .\n" + "\n" + "<#spiderman>\n" + " rel:enemyOf <#green-goblin> ;\n" + " a foaf:Person ;\n" + " foaf:name \"Spiderman\", \"Человек-паук\"@ru ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE2) { + + TurtleStringParser parser( + " .\n"); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE3) { + + TurtleStringParser parser( + " ;\n" + "\t\t\t\t \"Spiderman\" ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE4) { + + TurtleStringParser parser( + " .\n" + " \"Spiderman\" ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE5) { + + TurtleStringParser parser( + " \"Spiderman\", \"Человек-паук\"@ru ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE6) { + + TurtleStringParser parser( + " \"Spiderman\" .\n" + " \"Человек-паук\"@ru ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE7) { + + TurtleStringParser parser( + "@prefix somePrefix: .\n" + "\n" + " somePrefix:enemyOf ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE8) { + + TurtleStringParser parser( + "PREFIX somePrefix: \n" + "\n" + " somePrefix:enemyOf .\n" + "\t\t\t\t "); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE9) { + + TurtleStringParser parser( + " .\n" + "@base .\n" + " . \n" + "BASE \n" + " . \n" + "@prefix p: .\n" + "p:subject3 p:predicate3 p:object3 . \n" + "PREFIX p: \n" + "p:subject3 p:predicate3 p:object3 . \n" + "@prefix p: . \n" + "p:subject4 p:predicate4 p:object4 . \n" + "@prefix : . \n" + ":subject5 :predicate5 :object5 . \n" + ":subject6 a :subject7 . \n" + " a :subject8 ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE10) { + + TurtleStringParser parser( + "@prefix foaf: .\n" + "\n" + " foaf:name \"Green Goblin\" .\n" + "\n" + " foaf:name \"Spiderman\" ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE11) { + + TurtleStringParser parser( + "@prefix show: .\n" + "show:218 show:blurb '''This is a multi-line \n" + "literal with many quotes''' ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE12) { + + TurtleStringParser parser( + "@prefix : . \n" + " \n" + " :atomicNumber 2 ; \n" + " :atomicMass 4.002602 ; \n" + " :specificGravity 1.663E-4 . "); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE13) { + + TurtleStringParser parser( + "@prefix : .\n" + "\n" + " :isLandlocked false . "); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE14) { + + TurtleStringParser parser( + "@prefix foaf: .\n" + "\n" + "_:alice foaf:knows _:bob .\n" + "_:bob foaf:knows _:alice ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE15) { + + TurtleStringParser parser( + "@prefix foaf: .\n" + "\n" + "[] foaf:knows [ foaf:name \"Bob\" ] ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE16) { + + TurtleStringParser parser( + "@prefix foaf: .\n" + "\n" + "[ foaf:name \"Alice\" ] foaf:knows [\n" + " foaf:name \"Bob\" ;\n" + " foaf:knows [\n" + " foaf:name \"Eve\" ] ;\n" + " foaf:mbox ] ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE17) { + + TurtleStringParser parser( + "_:a \"Alice\" .\n" + "_:a _:b .\n" + "_:b \"Bob\" .\n" + "_:b _:c .\n" + "_:c \"Eve\" .\n" + "_:b ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE18) { + + TurtleStringParser parser( + "@prefix : .\n" + ":subject :predicate ( :a :b :c ) .\n" + "\n" + ":subject :predicate2 () ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE19) { + + TurtleStringParser parser( + "@prefix rdf: .\n" + "@prefix dc: .\n" + "@prefix ex: .\n" + "\n" + "\n" + " dc:title \"RDF/XML Syntax Specification (Revised)\" ;\n" + " ex:editor [\n" + " ex:fullname \"Dave Beckett\";\n" + " ex:homePage \n" + " ] ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE20) { + + TurtleStringParser parser( + "PREFIX : \n" + ":a :b ( \"apple\" \"banana\" ) ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE21) { + + TurtleStringParser parser( + "@prefix : .\n" + "@prefix rdf: .\n" + ":a :b\n" + " [ rdf:first \"apple\";\n" + " rdf:rest [ rdf:first \"banana\";\n" + " rdf:rest rdf:nil ]\n" + " ] ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE22) { + + TurtleStringParser parser( + "@prefix : .\n" + "\n" + ":a :b \"The first line\\nThe second line\\n more\" .\n" + "\n" + ":a :b \"\"\"The first line\n" + "The second line\n" + " more\"\"\" ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE23) { + + TurtleStringParser parser( + "@prefix : .\n" + "(1 2.0 3E1) :p \"w\" ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE24) { + + TurtleStringParser parser( + "@prefix rdf: .\n" + "PREFIX : \n" + " _:b0 rdf:first 1 ;\n" + " rdf:rest _:b1 .\n" + " _:b1 rdf:first 2.0 ;\n" + " rdf:rest _:b2 .\n" + " _:b2 rdf:first 3E1 ;\n" + " rdf:rest rdf:nil .\n" + " _:b0 :p \"w\" . "); + auto it = parser.begin(); + std::cout << std::endl; + it++; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + + TEST(TurtleOfficalExamplesTests, EXAMPLE25) { + + TurtleStringParser parser( + "PREFIX : \n" + "(1 [:p :q] ( 2 ) ) :p2 :q2 ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } + + TEST(TurtleOfficalExamplesTests, EXAMPLE26) { + + TurtleStringParser parser( + "@prefix rdf: .\n" + "PREFIX : \n" + " _:b0 rdf:first 1 ;\n" + " rdf:rest _:b1 .\n" + " _:b1 rdf:first _:b2 .\n" + " _:b2 :p :q .\n" + " _:b1 rdf:rest _:b3 .\n" + " _:b3 rdf:first _:b4 .\n" + " _:b4 rdf:first 2 ;\n" + " rdf:rest rdf:nil .\n" + " _:b3 rdf:rest rdf:nil ."); + auto it = parser.begin(); + std::cout << std::endl; + while (it) { + auto triple = *it; + std::cout << triple.subject().getIdentifier() << " " + << triple.predicate().getIdentifier() << " " + << triple.object().getIdentifier() + << std::endl; + it++; + } + } +}// namespace Dice::tests::rdf_parser::turtle_official_examples_tests \ No newline at end of file diff --git a/tests/TurtleOfficialEvaluationTests.cpp b/tests/TurtleOfficialEvaluationTests.cpp index 7cce8e1..8cff83e 100644 --- a/tests/TurtleOfficialEvaluationTests.cpp +++ b/tests/TurtleOfficialEvaluationTests.cpp @@ -1,1090 +1,1004 @@ #include -#include -TEST(TurtleOfficialEvaluationTests, bareword_a_predicate) { - TurtleParser turtleParser(" a ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); +#include + + +namespace Dice::tests::rdf_parser::turtle_official_evaluation_tests { + using namespace Dice::rdf_parser; + + TEST(TurtleOfficialEvaluationTests, bareword_a_predicate) { + TurtleStringParser turtleParser(" a ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, bareword_decimal) { + TurtleStringParser turtleParser(" 1.0 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"1.0\"^^ ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, bareword_double) { + TurtleStringParser turtleParser( + " \"1E0\"^^ ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" 1E0 ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_as_object) { + TurtleStringParser turtleParser( + " [ ] ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" _:b1 .\n" + "_:b1 ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_as_subject) { + TurtleStringParser turtleParser( + "[ ] ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:b1 .\n" + "_:b1 ."); + + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_containing_collection) { + TurtleStringParser turtleParser("[ (1) ] ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:b1 _:el1 .\n" + "_:el1 \"1\"^^ .\n" + "_:el1 ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_with_multiple_triples) { + TurtleStringParser turtleParser( + "[ ; ] ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:b1 .\n" + "_:b1 .\n" + "_:b1 ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, collection_object) { + TurtleStringParser turtleParser(" (1) ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" _:el1 .\n" + "_:el1 \"1\"^^ .\n" + "_:el1 ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, collection_subject) { + TurtleStringParser turtleParser("(1) ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + "_:el1 \"1\"^^ .\n" + "_:el1 .\n" + "_:el1 ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, comment_following_PNAME_NS) { + + TurtleStringParser turtleParser("@prefix p: .\n" + " p:#comment\n" + "."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } + + TEST(TurtleOfficialEvaluationTests, double_lower_case_e) { + + TurtleStringParser turtleParser(" 1e0 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"1e0\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, empty_collection) { + + TurtleStringParser turtleParser(" () ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, first) { + + TurtleStringParser turtleParser(" ((1) 2) ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" _:outerEl1 .\n" + "_:outerEl1 _:innerEl1 .\n" + "_:innerEl1 \"1\"^^ .\n" + "_:innerEl1 .\n" + "_:outerEl1 _:outerEl2 .\n" + "_:outerEl2 \"2\"^^ .\n" + "_:outerEl2 ."); + //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); + } + + TEST(TurtleOfficialEvaluationTests, HYPHEN_MINUS_in_localName) { + + TurtleStringParser turtleParser("@prefix p: .\n" + "p:s- ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } + + TEST(TurtleOfficialEvaluationTests, IRI_with_all_punctuation) { + + TurtleStringParser turtleParser( + " ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, IRIREF_datatype) { + + TurtleStringParser turtleParser( + " \"1\"^^ ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"1\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, labeled_blank_node_object) { + + TurtleStringParser turtleParser(" _:o ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" _:b1 ."); + } + + TEST(TurtleOfficialEvaluationTests, labeled_blank_node_subject) { + + TurtleStringParser turtleParser("_:s ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:b1 ."); + } + + TEST(TurtleOfficialEvaluationTests, langtagged_LONG_with_subtag) { + + TurtleStringParser turtleParser("# Test long literal with lang tag\n" + "@prefix : .\n" + ":a :b \"\"\"Cheers\"\"\"@en-UK ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"Cheers\"@en-UK ."); + } + + TEST(TurtleOfficialEvaluationTests, langtagged_non_LONG) { + + TurtleStringParser turtleParser(" \"chat\"@en ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"chat\"@en ."); + } + + TEST(TurtleOfficialEvaluationTests, lantag_with_subtag) { + + TurtleStringParser turtleParser(" \"chat\"@en-us ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"chat\"@en-us ."); + } + + TEST(TurtleOfficialEvaluationTests, last) { + + TurtleStringParser turtleParser(" (1 (2)) ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" _:outerEl1 .\n" + "_:outerEl1 \"1\"^^ .\n" + "_:outerEl1 _:outerEl2 .\n" + "_:outerEl2 _:innerEl1 .\n" + "_:innerEl1 \"2\"^^ .\n" + "_:innerEl1 .\n" + "_:outerEl2 ."); + } + + TEST(TurtleOfficialEvaluationTests, LITERAL1) { + + TurtleStringParser turtleParser(" 'x' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"x\" ."); + } + + TEST(TurtleOfficialEvaluationTests, LITERAL1_all_punctuation) { -} - -TEST(TurtleOfficialEvaluationTests, bareword_decimal) { - TurtleParser turtleParser(" 1.0 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"1.0\"^^ ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, bareword_double) { - TurtleParser turtleParser( - " \"1E0\"^^ ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" 1E0 ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_as_object) { - TurtleParser turtleParser( - " [ ] ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" _:b1 .\n" - "_:b1 ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_as_subject) { - TurtleParser turtleParser( - "[ ] ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:b1 .\n" - "_:b1 ."); - - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_containing_collection) { - TurtleParser turtleParser("[ (1) ] ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:b1 _:el1 .\n" - "_:el1 \"1\"^^ .\n" - "_:el1 ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, blankNodePropertyList_with_multiple_triples) { - TurtleParser turtleParser( - "[ ; ] ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:b1 .\n" - "_:b1 .\n" - "_:b1 ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, collection_object) { - TurtleParser turtleParser(" (1) ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" _:el1 .\n" - "_:el1 \"1\"^^ .\n" - "_:el1 ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, collection_subject) { - TurtleParser turtleParser("(1) ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - "_:el1 \"1\"^^ .\n" - "_:el1 .\n" - "_:el1 ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, comment_following_PNAME_NS) { - - TurtleParser turtleParser("@prefix p: .\n" - " p:#comment\n" - "."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); - -} - -TEST(TurtleOfficialEvaluationTests, double_lower_case_e) { - - TurtleParser turtleParser(" 1e0 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"1e0\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, empty_collection) { - - TurtleParser turtleParser(" () ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, first) { - - TurtleParser turtleParser(" ((1) 2) ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" _:outerEl1 .\n" - "_:outerEl1 _:innerEl1 .\n" - "_:innerEl1 \"1\"^^ .\n" - "_:innerEl1 .\n" - "_:outerEl1 _:outerEl2 .\n" - "_:outerEl2 \"2\"^^ .\n" - "_:outerEl2 ."); - //ASSERT_TRUE(turtle_state.parsed_terms==nTriples_state.parsed_terms); - -} - -TEST(TurtleOfficialEvaluationTests, HYPHEN_MINUS_in_localName) { - - TurtleParser turtleParser("@prefix p: .\n" - "p:s- ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); - -} - -TEST(TurtleOfficialEvaluationTests, IRI_with_all_punctuation) { - - TurtleParser turtleParser( - " ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, IRIREF_datatype) { - - TurtleParser turtleParser( - " \"1\"^^ ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"1\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, labeled_blank_node_object) { - - TurtleParser turtleParser(" _:o ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" _:b1 ."); - -} - -TEST(TurtleOfficialEvaluationTests, labeled_blank_node_subject) { - - TurtleParser turtleParser("_:s ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:b1 ."); - -} - -TEST(TurtleOfficialEvaluationTests, langtagged_LONG_with_subtag) { - - TurtleParser turtleParser("# Test long literal with lang tag\n" - "@prefix : .\n" - ":a :b \"\"\"Cheers\"\"\"@en-UK ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"Cheers\"@en-UK ."); - -} - -TEST(TurtleOfficialEvaluationTests, langtagged_non_LONG) { - - TurtleParser turtleParser(" \"chat\"@en ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"chat\"@en ."); - -} - -TEST(TurtleOfficialEvaluationTests, lantag_with_subtag) { - - TurtleParser turtleParser(" \"chat\"@en-us ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"chat\"@en-us ."); - -} - -TEST(TurtleOfficialEvaluationTests, last) { + TurtleStringParser turtleParser( + " ' !\"#$%&():;<=>?@[]^_`{|}~' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \" !\\\"#$%&():;<=>?@[]^_`{|}~\" ."); + } - TurtleParser turtleParser(" (1 (2)) ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" _:outerEl1 .\n" - "_:outerEl1 \"1\"^^ .\n" - "_:outerEl1 _:outerEl2 .\n" - "_:outerEl2 _:innerEl1 .\n" - "_:innerEl1 \"2\"^^ .\n" - "_:innerEl1 .\n" - "_:outerEl2 ."); + TEST(TurtleOfficialEvaluationTests, LITERAL1_ascii_boundaries) { -} - -TEST(TurtleOfficialEvaluationTests, LITERAL1) { - - TurtleParser turtleParser(" 'x' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"x\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, LITERAL1_all_punctuation) { - - TurtleParser turtleParser( - " ' !\"#$%&():;<=>?@[]^_`{|}~' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \" !\\\"#$%&():;<=>?@[]^_`{|}~\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, LITERAL1_ascii_boundaries) { - - TurtleParser turtleParser( - " ' \t\u000B\f\u000E&([]\u007F' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"\\u0000\\t\\u000B\\u000C\\u000E&([]\\u007F\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, literal_false) { - - TurtleParser turtleParser(" false ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"false\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, LITERAL_LONG1_ascii_boundaries) { - - TurtleParser turtleParser(" ' &([]\u007F' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\u0000&([]\\u007F\" ."); + TurtleStringParser turtleParser( + " ' \t\u000B\f\u000E&([]\u007F' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"\\u0000\\t\\u000B\\u000C\\u000E&([]\\u007F\" ."); + } -} + TEST(TurtleOfficialEvaluationTests, literal_false) { -TEST(TurtleOfficialEvaluationTests, LITERAL_LONG1_with_1_squote) { + TurtleStringParser turtleParser(" false ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"false\"^^ ."); + } - TurtleParser turtleParser(" '''x'y''' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"x'y\" ."); + TEST(TurtleOfficialEvaluationTests, LITERAL_LONG1_ascii_boundaries) { -} + TurtleStringParser turtleParser(" ' &([]\u007F' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\u0000&([]\\u007F\" ."); + } -TEST(TurtleOfficialEvaluationTests, LITERAL_LONG1_with_2_squotes) { + TEST(TurtleOfficialEvaluationTests, LITERAL_LONG1_with_1_squote) { - TurtleParser turtleParser(" '''x''y''' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"x''y\" ."); + TurtleStringParser turtleParser(" '''x'y''' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"x'y\" ."); + } -} + TEST(TurtleOfficialEvaluationTests, LITERAL_LONG1_with_2_squotes) { -TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_ascii_boundaries) { + TurtleStringParser turtleParser(" '''x''y''' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"x''y\" ."); + } - TurtleParser turtleParser(" \" !#[]\u007F\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\u0000!#[]\\u007F\" ."); + TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_ascii_boundaries) { -} + TurtleStringParser turtleParser(" \" !#[]\u007F\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\u0000!#[]\\u007F\" ."); + } -TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_with_1_squote) { + TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_with_1_squote) { - TurtleParser turtleParser(" \"\"\"x\"y\"\"\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"x\\\"y\" ."); + TurtleStringParser turtleParser(" \"\"\"x\"y\"\"\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"x\\\"y\" ."); + } -} + TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_with_2_squotes) { -TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_with_2_squotes) { + TurtleStringParser turtleParser(" \"\"\"x\"\"y\"\"\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"x\\\"\\\"y\" ."); + } - TurtleParser turtleParser(" \"\"\"x\"\"y\"\"\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"x\\\"\\\"y\" ."); + TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_with_REVERSE_SOLIDUS) { -} + TurtleStringParser turtleParser("@prefix : .\n" + "\n" + ":s :p1 \"\"\"test-\\\\\"\"\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"test-\\\\\" ."); + } -TEST(TurtleOfficialEvaluationTests, LITERAL_LONG2_with_REVERSE_SOLIDUS) { + TEST(TurtleOfficialEvaluationTests, literal_true) { - TurtleParser turtleParser("@prefix : .\n" - "\n" - ":s :p1 \"\"\"test-\\\\\"\"\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"test-\\\\\" ."); + TurtleStringParser turtleParser(" true ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"true\"^^ ."); + } -} + TEST(TurtleOfficialEvaluationTests, literal_with_BACKSPACE) { -TEST(TurtleOfficialEvaluationTests, literal_true) { + TurtleStringParser turtleParser(" '\b' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\u0008\" ."); + } - TurtleParser turtleParser(" true ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"true\"^^ ."); + TEST(TurtleOfficialEvaluationTests, literal_with_CARRIAGE_RETURN) { -} + TurtleStringParser turtleParser(" '''\n" + "''' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\r\" ."); + } -TEST(TurtleOfficialEvaluationTests, literal_with_BACKSPACE) { + TEST(TurtleOfficialEvaluationTests, literal_with_CHARACTER_TABULATION) { - TurtleParser turtleParser(" '\b' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\u0008\" ."); + TurtleStringParser turtleParser(" '\t' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\t\" ."); + } -} + TEST(TurtleOfficialEvaluationTests, literal_with_FORM_FEED) { -TEST(TurtleOfficialEvaluationTests, literal_with_CARRIAGE_RETURN) { + TurtleStringParser turtleParser(" '\f' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\u000C\" ."); + } - TurtleParser turtleParser(" '''\n" - "''' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\r\" ."); + TEST(TurtleOfficialEvaluationTests, literal_with_LINE_FEED) { -} + TurtleStringParser turtleParser(" '''\n" + "''' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\n\" ."); + } -TEST(TurtleOfficialEvaluationTests, literal_with_CHARACTER_TABULATION) { + TEST(TurtleOfficialEvaluationTests, literal_with_numeric_escape4) { - TurtleParser turtleParser(" '\t' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\t\" ."); + TurtleStringParser turtleParser(" '\\u006F' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"o\" ."); + } -} - -TEST(TurtleOfficialEvaluationTests, literal_with_FORM_FEED) { - - TurtleParser turtleParser(" '\f' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\u000C\" ."); + TEST(TurtleOfficialEvaluationTests, literal_with_REVERSE_SOLIDUS) { + TurtleStringParser turtleParser(" '\\\\' ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\\\\\" ."); + } -} - -TEST(TurtleOfficialEvaluationTests, literal_with_LINE_FEED) { - - TurtleParser turtleParser(" '''\n" - "''' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\n\" ."); - - -} + TEST(TurtleOfficialEvaluationTests, localName_with_assigned_nfc_bmp_PN_CHARS_BASE_character_boundaries) { -TEST(TurtleOfficialEvaluationTests, literal_with_numeric_escape4) { + TurtleStringParser turtleParser("@prefix p: .\n" + " p:AZazÀÖØöø˿Ͱͽ΄῾\u200C\u200D⁰↉Ⰰ⿕、ퟻ﨎ﷇﷰ\uFFEF ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } - TurtleParser turtleParser(" '\\u006F' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"o\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, literal_with_REVERSE_SOLIDUS) { - - TurtleParser turtleParser(" '\\\\' ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\\\\\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, localName_with_assigned_nfc_bmp_PN_CHARS_BASE_character_boundaries) { - - TurtleParser turtleParser("@prefix p: .\n" - " p:AZazÀÖØöø˿Ͱͽ΄῾\u200C\u200D⁰↉Ⰰ⿕、ퟻ﨎ﷇﷰ\uFFEF ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -/*TEST(TurtleOfficialEvaluationTests, localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries) { + /*TEST(TurtleOfficialEvaluationTests, localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries) { - TurtleParser turtleParser("@prefix p: .\n" + RdfStringParser turtleParser("@prefix p: .\n" " p:AZazÀÖØöø˿Ͱͽ΄῾\u200C\u200D⁰↉Ⰰ⿕、ퟻ﨎ﷇﷰ\uFFEF\uD800\uDC00\uDB40\uDDEF ."); auto it=turtleParser.begin(); - TurtleParser nTriplesParser(" ."); + RdfStringParser nTriplesParser(" ."); }*/ -TEST(TurtleOfficialEvaluationTests, localname_with_COLON) { - - TurtleParser turtleParser("@prefix p: .\n" - "p:s: ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); + TEST(TurtleOfficialEvaluationTests, localname_with_COLON) { -} + TurtleStringParser turtleParser("@prefix p: .\n" + "p:s: ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } -TEST(TurtleOfficialEvaluationTests, localName_with_leading_digit) { + TEST(TurtleOfficialEvaluationTests, localName_with_leading_digit) { - TurtleParser turtleParser("@prefix p: .\n" - "p:0 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); - -} + TurtleStringParser turtleParser("@prefix p: .\n" + "p:0 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } -TEST(TurtleOfficialEvaluationTests, localName_with_leading_underscore) { + TEST(TurtleOfficialEvaluationTests, localName_with_leading_underscore) { - TurtleParser turtleParser("@prefix p: .\n" - "p:_ ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); + TurtleStringParser turtleParser("@prefix p: .\n" + "p:_ ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } -} - -/* + /* TEST(TurtleOfficialEvaluationTests, localName_with_nfc_PN_CHARS_BASE_character_boundaries) { - TurtleParser turtleParser("@prefix p: .\n" + RdfStringParser turtleParser("@prefix p: .\n" " p:AZazÀÖØöø˿Ͱͽ\u037F\u1FFF\u200C\u200D⁰\u218FⰀ\u2FEF、\uD7FF﨎\uFDCFﷰ\uFFEF\uD800\uDC00\uDB7F\uDFFD ."); auto it=turtleParser.begin(); - TurtleParser nTriplesParser(" ."); + RdfStringParser nTriplesParser(" ."); } */ -TEST(TurtleOfficialEvaluationTests, localName_with_non_leading_extras) { - - TurtleParser turtleParser("@prefix p: .\n" - "p:a·̀ͯ‿.⁀ ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, negative_numeric) { - - TurtleParser turtleParser(" -1 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"-1\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, nested_blankNodePropertyLists) { - - TurtleParser turtleParser( - "[ [ ] ; ]."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:b1 _:b2 .\n" - "_:b2 .\n" - "_:b1 ."); - -} - -TEST(TurtleOfficialEvaluationTests, nested_collection) { - - TurtleParser turtleParser(" ((1)) ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" _:outerEl1 .\n" - "_:outerEl1 _:innerEl1 .\n" - "_:innerEl1 \"1\"^^ .\n" - "_:innerEl1 .\n" - "_:outerEl1 ."); - -} - -TEST(TurtleOfficialEvaluationTests, number_sign_following_localName) { - - TurtleParser turtleParser("@prefix p: .\n" - " p:o\\#numbersign\n" - "."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, number_sign_following_PNAME_NS) { - - TurtleParser turtleParser("@prefix p: .\n" - " p:\\#numbersign\n" - "."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, numeric_with_leading_0) { - - TurtleParser turtleParser(" 01 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"01\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, objectList_with_two_objects) { - - TurtleParser turtleParser( - " , ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, percent_escaped_localName) { - - TurtleParser turtleParser("@prefix p: .\n" - "p:%25 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); - -} - -TEST(TurtleOfficialEvaluationTests, positive_numeric) { - - TurtleParser turtleParser(" +1 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"+1\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, predicateObjectList_with_two_objectLists) { - - TurtleParser turtleParser( - " ; ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, prefix_reassigned_and_used) { - - TurtleParser turtleParser("@prefix p: .\n" - "@prefix p: .\n" - "p:s ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); - -} - -TEST(TurtleOfficialEvaluationTests, repeated_semis_not_at_end) { - - TurtleParser turtleParser(" ;; ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); - -} - -TEST(TurtleOfficialEvaluationTests, reserved_escaped_localName) { - - TurtleParser turtleParser("@prefix p: .\n" - "p:\\_\\~\\.\\-\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\/\\?\\#\\@\\%00 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_eval_struct_01) { - - TurtleParser turtleParser( - " ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_eval_struct_02) { - - TurtleParser turtleParser(" \n" - " ;\n" - " ; \n" - " ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_01) { - - TurtleParser turtleParser("@prefix : <#> .\n" - "[] :x :y ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - "_:genid1 ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_02) { - - TurtleParser turtleParser("# Test @prefix and qnames\n" - "@prefix : .\n" - "@prefix a: .\n" - "@prefix b: .\n" - ":a :b :c .\n" - "a:a a:b a:c .\n" - ":a a:a b:a ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " .\n" - " .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_03) { - - TurtleParser turtleParser("# Test , operator\n" - "@prefix : .\n" - ":a :b :c,\n" - " :d,\n" - " :e ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " .\n" - " .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_04) { - - TurtleParser turtleParser("# Test ; operator\n" - "@prefix : .\n" - ":a :b :c ;\n" - " :d :e ;\n" - " :f :g ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " .\n" - " .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_05) { - - TurtleParser turtleParser("# Test empty [] operator; not allowed as predicate\n" - "@prefix : .\n" - "[] :a :b .\n" - ":c :d [] ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:genid1 .\n" - " _:genid2 ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_06) { - - TurtleParser turtleParser("# Test non empty [] operator; not allowed as predicate\n" - "@prefix : .\n" - "[ :a :b ] :c :d .\n" - ":e :f [ :g :h ] ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:genid1 .\n" - "_:genid1 .\n" - "_:genid2 .\n" - " _:genid2 ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_07) { - - TurtleParser turtleParser("# 'a' only allowed as a predicate\n" - "@prefix : .\n" - ":a a :b ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_08) { - - TurtleParser turtleParser("@prefix : .\n" - ":a :b ( \"apple\" \"banana\" ) .\n" - ""); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - "_:genid1 \"banana\" .\n" - "_:genid1 .\n" - "_:genid2 \"apple\" .\n" - "_:genid2 _:genid1 .\n" - " _:genid2 ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_09) { - - TurtleParser turtleParser("@prefix : .\n" - ":a :b ( ) .\n" - ""); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_10) { - - TurtleParser turtleParser("# Test integer datatyped literals using an OWL cardinality constraint\n" - "@prefix owl: .\n" - "\n" - "# based on examples in the OWL Reference\n" - "\n" - "_:hasParent a owl:ObjectProperty .\n" - "\n" - "[] a owl:Restriction ;\n" - " owl:onProperty _:hasParent ;\n" - " owl:maxCardinality 2 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - "_:hasParent .\n" - "_:genid1 .\n" - "_:genid1 _:hasParent .\n" - "_:genid1 \"2\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_11) { - - TurtleParser turtleParser(" 000000 .\n" - " 0 .\n" - " 000001 .\n" - " 2 .\n" - " 4 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"000000\"^^ .\n" - " \"0\"^^ .\n" - " \"000001\"^^ .\n" - " \"2\"^^ .\n" - " \"4\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_12) { - - TurtleParser turtleParser("# Tests for - and _ in names, qnames\n" - "@prefix ex1: .\n" - "@prefix ex-2: .\n" - "@prefix ex3_: .\n" - "@prefix ex4-: .\n" - "\n" - "ex1:foo-bar ex1:foo_bar \"a\" .\n" - "ex-2:foo-bar ex-2:foo_bar \"b\" .\n" - "ex3_:foo-bar ex3_:foo_bar \"c\" .\n" - "ex4-:foo-bar ex4-:foo_bar \"d\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"a\" .\n" - " \"b\" .\n" - " \"c\" .\n" - " \"d\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_13) { - - TurtleParser turtleParser("# Tests for rdf:_ and other qnames starting with _\n" - "@prefix rdf: .\n" - "@prefix ex: .\n" - "@prefix : .\n" - "\n" - "ex:foo rdf:_1 \"1\" .\n" - "ex:foo rdf:_2 \"2\" .\n" - "ex:foo :_abc \"def\" .\n" - "ex:foo :_345 \"678\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"1\" .\n" - " \"2\" .\n" - " \"def\" .\n" - " \"678\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_14) { - - TurtleParser turtleParser("# Test for : allowed\n" - "@prefix : .\n" - "\n" - "[] : [] .\n" - "\n" - ": : : .\n" - ""); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser("_:genid1 _:genid2 .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_15) { - - TurtleParser turtleParser("# Test long literal\n" - "@prefix : .\n" - ":a :b \"\"\"a long\n" - "\tliteral\n" - "with\n" - "newlines\"\"\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"a long\\n\\tliteral\\nwith\\nnewlines\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_16) { - - TurtleParser turtleParser("@prefix : .\n" - "\n" - "## \\U00015678 is a not a legal codepoint\n" - "## :a :b \"\"\"\\nthis \\ris a \\U00015678long\\t\n" - "## literal\\uABCD\n" - "## \"\"\" .\n" - "## \n" - "## :d :e \"\"\"\\tThis \\uABCDis\\r \\U00015678another\\n\n" - "## one\n" - "## \"\"\" .\n" - "\n" - "# \\U00015678 is a not a legal codepoint\n" - "# \\U00012451 in Cuneiform numeric ban 3\n" - ":a :b \"\"\"\\nthis \\ris a \\U00012451long\\t\n" - "literal\\uABCD\n" - "\"\"\" .\n" - "\n" - ":d :e \"\"\"\\tThis \\uABCDis\\r \\U00012451another\\n\n" - "one\n" - "\"\"\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"\\nthis \\ris a \\U00012451long\\t\\nliteral\\uABCD\\n\" .\n" - " \"\\tThis \\uABCDis\\r \\U00012451another\\n\\none\\n\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_17) { - - TurtleParser turtleParser("@prefix : .\n" - "\n" - ":a :b 1.0 .\n" - ""); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"1.0\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_18) { - - TurtleParser turtleParser("@prefix : .\n" - "\n" - ":a :b \"\" .\n" - "\n" - ":c :d \"\"\"\"\"\" .\n" - ""); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" \"\" .\n" - " \"\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_19) { - - TurtleParser turtleParser("@prefix : .\n" - ":a :b 1.0 .\n" - ":c :d 1 .\n" - ":e :f 1.0e0 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"1.0\"^^ .\n" - " \"1\"^^ .\n" - " \"1.0e0\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_20) { - - TurtleParser turtleParser("@prefix : .\n" - ":a :b -1.0 .\n" - ":c :d -1 .\n" - ":e :f -1.0e0 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"-1.0\"^^ .\n" - " \"-1\"^^ .\n" - " \"-1.0e0\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_21) { - - TurtleParser turtleParser("# Test long literal\n" - "@prefix : .\n" - ":a :b \"\"\"John said: \"Hello World!\\\"\"\"\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"John said: \\\"Hello World!\\\"\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_22) { - - TurtleParser turtleParser("@prefix : .\n" - ":a :b true .\n" - ":c :d false ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"true\"^^ .\n" - " \"false\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, DISABLED_turtle_subm_23) { - - TurtleParser turtleParser("# comment test\n" - "@prefix : .\n" - ":a :b :c . # end of line comment\n" - ":d # ignore me\n" - " :e # and me\n" - " :f # and me\n" - " .\n" - ":g :h #ignore me\n" - " :i, # and me\n" - " :j . # and me\n" - "\n" - ":k :l :m ; #ignore me\n" - " :n :o ; # and me\n" - " :p :q . # and me"); - bool a = turtleParser.isContentParsable(); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " .\n" - " .\n" - " .\n" - " .\n" - " .\n" - " .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, DISABLED_turtle_subm_24) { - - TurtleParser turtleParser("# comment line with no final newline test\n" - "@prefix : .\n" - ":a :b :c .\n" - "#foo"); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_25) { - - TurtleParser turtleParser("@prefix foo: .\n" - "@prefix foo: .\n" - "\n" - "foo:blah foo:blah foo:blah .\n" - ""); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_26) { - - TurtleParser turtleParser( - " \"2.345\"^^ .\n" - " \"1\"^^ .\n" - " \"1.0\"^^ .\n" - " \"1.\"^^ .\n" - " \"1.000000000\"^^ .\n" - " \"2.3\"^^ .\n" - " \"2.234000005\"^^ .\n" - " \"2.2340000005\"^^ .\n" - " \"2.23400000005\"^^ .\n" - " \"2.234000000005\"^^ .\n" - " \"2.2340000000005\"^^ .\n" - " \"2.23400000000005\"^^ .\n" - " \"2.234000000000005\"^^ .\n" - " \"2.2340000000000005\"^^ .\n" - " \"2.23400000000000005\"^^ .\n" - " \"2.234000000000000005\"^^ .\n" - " \"2.2340000000000000005\"^^ .\n" - " \"2.23400000000000000005\"^^ .\n" - " \"2.234000000000000000005\"^^ .\n" - " \"2.2340000000000000000005\"^^ .\n" - " \"2.23400000000000000000005\"^^ .\n" - " \"1.2345678901234567890123457890\"^^ ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"2.345\"^^ .\n" - " \"1\"^^ .\n" - " \"1.0\"^^ .\n" - " \"1.\"^^ .\n" - " \"1.000000000\"^^ .\n" - " \"2.3\"^^ .\n" - " \"2.234000005\"^^ .\n" - " \"2.2340000005\"^^ .\n" - " \"2.23400000005\"^^ .\n" - " \"2.234000000005\"^^ .\n" - " \"2.2340000000005\"^^ .\n" - " \"2.23400000000005\"^^ .\n" - " \"2.234000000000005\"^^ .\n" - " \"2.2340000000000005\"^^ .\n" - " \"2.23400000000000005\"^^ .\n" - " \"2.234000000000000005\"^^ .\n" - " \"2.2340000000000000005\"^^ .\n" - " \"2.23400000000000000005\"^^ .\n" - " \"2.234000000000000000005\"^^ .\n" - " \"2.2340000000000000000005\"^^ .\n" - " \"2.23400000000000000000005\"^^ .\n" - " \"1.2345678901234567890123457890\"^^ ."); - -} - -TEST(TurtleOfficialEvaluationTests, turtle_subm_27) { - - TurtleParser turtleParser( - "# In-scope base URI is at this point\n" - " .\n" - "@base .\n" - "# In-scope base URI is http://example.org/ns/ at this point\n" - " .\n" - "@base .\n" - "# In-scope base URI is http://example.org/ns/foo/ at this point\n" - " .\n" - "@prefix : .\n" - ":a4 :b4 :c4 .\n" - "@prefix : .\n" - ":a5 :b5 :c5 ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " .\n" - " .\n" - " .\n" - " .\n" - " ."); - -} - -TEST(TurtleOfficialEvaluationTests, two_LITERAL_LONG2s) { - - TurtleParser turtleParser("# Test long literal twice to ensure it does not over-quote\n" - "@prefix : .\n" - ":a :b \"\"\"first long literal\"\"\" .\n" - ":c :d \"\"\"second long literal\"\"\" ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser( - " \"first long literal\" .\n" - " \"second long literal\" ."); - -} - -TEST(TurtleOfficialEvaluationTests, underscore_in_localName) { - - TurtleParser turtleParser("@prefix p: .\n" - "p:s_ ."); - auto it = turtleParser.begin(); - TurtleParser nTriplesParser(" ."); - -} + TEST(TurtleOfficialEvaluationTests, localName_with_non_leading_extras) { + + TurtleStringParser turtleParser("@prefix p: .\n" + "p:a·̀ͯ‿.⁀ ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, negative_numeric) { + + TurtleStringParser turtleParser(" -1 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"-1\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, nested_blankNodePropertyLists) { + + TurtleStringParser turtleParser( + "[ [ ] ; ]."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:b1 _:b2 .\n" + "_:b2 .\n" + "_:b1 ."); + } + + TEST(TurtleOfficialEvaluationTests, nested_collection) { + + TurtleStringParser turtleParser(" ((1)) ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" _:outerEl1 .\n" + "_:outerEl1 _:innerEl1 .\n" + "_:innerEl1 \"1\"^^ .\n" + "_:innerEl1 .\n" + "_:outerEl1 ."); + } + + TEST(TurtleOfficialEvaluationTests, number_sign_following_localName) { + + TurtleStringParser turtleParser("@prefix p: .\n" + " p:o\\#numbersign\n" + "."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, number_sign_following_PNAME_NS) { + + TurtleStringParser turtleParser("@prefix p: .\n" + " p:\\#numbersign\n" + "."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, numeric_with_leading_0) { + + TurtleStringParser turtleParser(" 01 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"01\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, objectList_with_two_objects) { + + TurtleStringParser turtleParser( + " , ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, percent_escaped_localName) { + + TurtleStringParser turtleParser("@prefix p: .\n" + "p:%25 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } + + TEST(TurtleOfficialEvaluationTests, positive_numeric) { + + TurtleStringParser turtleParser(" +1 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"+1\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, predicateObjectList_with_two_objectLists) { + + TurtleStringParser turtleParser( + " ; ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, prefix_reassigned_and_used) { + + TurtleStringParser turtleParser("@prefix p: .\n" + "@prefix p: .\n" + "p:s ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } + + TEST(TurtleOfficialEvaluationTests, repeated_semis_not_at_end) { + + TurtleStringParser turtleParser(" ;; ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } + + TEST(TurtleOfficialEvaluationTests, reserved_escaped_localName) { + + TurtleStringParser turtleParser("@prefix p: .\n" + "p:\\_\\~\\.\\-\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\/\\?\\#\\@\\%00 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_eval_struct_01) { + + TurtleStringParser turtleParser( + " ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_eval_struct_02) { + + TurtleStringParser turtleParser(" \n" + " ;\n" + " ; \n" + " ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_01) { + + TurtleStringParser turtleParser("@prefix : <#> .\n" + "[] :x :y ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + "_:genid1 ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_02) { + + TurtleStringParser turtleParser("# Test @prefix and qnames\n" + "@prefix : .\n" + "@prefix a: .\n" + "@prefix b: .\n" + ":a :b :c .\n" + "a:a a:b a:c .\n" + ":a a:a b:a ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " .\n" + " .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_03) { + + TurtleStringParser turtleParser("# Test , operator\n" + "@prefix : .\n" + ":a :b :c,\n" + " :d,\n" + " :e ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " .\n" + " .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_04) { + + TurtleStringParser turtleParser("# Test ; operator\n" + "@prefix : .\n" + ":a :b :c ;\n" + " :d :e ;\n" + " :f :g ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " .\n" + " .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_05) { + + TurtleStringParser turtleParser("# Test empty [] operator; not allowed as predicate\n" + "@prefix : .\n" + "[] :a :b .\n" + ":c :d [] ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:genid1 .\n" + " _:genid2 ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_06) { + + TurtleStringParser turtleParser("# Test non empty [] operator; not allowed as predicate\n" + "@prefix : .\n" + "[ :a :b ] :c :d .\n" + ":e :f [ :g :h ] ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:genid1 .\n" + "_:genid1 .\n" + "_:genid2 .\n" + " _:genid2 ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_07) { + + TurtleStringParser turtleParser("# 'a' only allowed as a predicate\n" + "@prefix : .\n" + ":a a :b ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_08) { + + TurtleStringParser turtleParser("@prefix : .\n" + ":a :b ( \"apple\" \"banana\" ) .\n" + ""); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + "_:genid1 \"banana\" .\n" + "_:genid1 .\n" + "_:genid2 \"apple\" .\n" + "_:genid2 _:genid1 .\n" + " _:genid2 ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_09) { + + TurtleStringParser turtleParser("@prefix : .\n" + ":a :b ( ) .\n" + ""); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_10) { + + TurtleStringParser turtleParser("# Test integer datatyped literals using an OWL cardinality constraint\n" + "@prefix owl: .\n" + "\n" + "# based on examples in the OWL Reference\n" + "\n" + "_:hasParent a owl:ObjectProperty .\n" + "\n" + "[] a owl:Restriction ;\n" + " owl:onProperty _:hasParent ;\n" + " owl:maxCardinality 2 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + "_:hasParent .\n" + "_:genid1 .\n" + "_:genid1 _:hasParent .\n" + "_:genid1 \"2\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_11) { + + TurtleStringParser turtleParser(" 000000 .\n" + " 0 .\n" + " 000001 .\n" + " 2 .\n" + " 4 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"000000\"^^ .\n" + " \"0\"^^ .\n" + " \"000001\"^^ .\n" + " \"2\"^^ .\n" + " \"4\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_12) { + + TurtleStringParser turtleParser("# Tests for - and _ in names, qnames\n" + "@prefix ex1: .\n" + "@prefix ex-2: .\n" + "@prefix ex3_: .\n" + "@prefix ex4-: .\n" + "\n" + "ex1:foo-bar ex1:foo_bar \"a\" .\n" + "ex-2:foo-bar ex-2:foo_bar \"b\" .\n" + "ex3_:foo-bar ex3_:foo_bar \"c\" .\n" + "ex4-:foo-bar ex4-:foo_bar \"d\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"a\" .\n" + " \"b\" .\n" + " \"c\" .\n" + " \"d\" ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_13) { + + TurtleStringParser turtleParser("# Tests for rdf:_ and other qnames starting with _\n" + "@prefix rdf: .\n" + "@prefix ex: .\n" + "@prefix : .\n" + "\n" + "ex:foo rdf:_1 \"1\" .\n" + "ex:foo rdf:_2 \"2\" .\n" + "ex:foo :_abc \"def\" .\n" + "ex:foo :_345 \"678\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"1\" .\n" + " \"2\" .\n" + " \"def\" .\n" + " \"678\" ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_14) { + + TurtleStringParser turtleParser("# Test for : allowed\n" + "@prefix : .\n" + "\n" + "[] : [] .\n" + "\n" + ": : : .\n" + ""); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser("_:genid1 _:genid2 .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_15) { + + TurtleStringParser turtleParser("# Test long literal\n" + "@prefix : .\n" + ":a :b \"\"\"a long\n" + "\tliteral\n" + "with\n" + "newlines\"\"\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"a long\\n\\tliteral\\nwith\\nnewlines\" ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_16) { + + TurtleStringParser turtleParser("@prefix : .\n" + "\n" + "## \\U00015678 is a not a legal codepoint\n" + "## :a :b \"\"\"\\nthis \\ris a \\U00015678long\\t\n" + "## literal\\uABCD\n" + "## \"\"\" .\n" + "## \n" + "## :d :e \"\"\"\\tThis \\uABCDis\\r \\U00015678another\\n\n" + "## one\n" + "## \"\"\" .\n" + "\n" + "# \\U00015678 is a not a legal codepoint\n" + "# \\U00012451 in Cuneiform numeric ban 3\n" + ":a :b \"\"\"\\nthis \\ris a \\U00012451long\\t\n" + "literal\\uABCD\n" + "\"\"\" .\n" + "\n" + ":d :e \"\"\"\\tThis \\uABCDis\\r \\U00012451another\\n\n" + "one\n" + "\"\"\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"\\nthis \\ris a \\U00012451long\\t\\nliteral\\uABCD\\n\" .\n" + " \"\\tThis \\uABCDis\\r \\U00012451another\\n\\none\\n\" ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_17) { + + TurtleStringParser turtleParser("@prefix : .\n" + "\n" + ":a :b 1.0 .\n" + ""); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"1.0\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_18) { + + TurtleStringParser turtleParser("@prefix : .\n" + "\n" + ":a :b \"\" .\n" + "\n" + ":c :d \"\"\"\"\"\" .\n" + ""); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" \"\" .\n" + " \"\" ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_19) { + + TurtleStringParser turtleParser("@prefix : .\n" + ":a :b 1.0 .\n" + ":c :d 1 .\n" + ":e :f 1.0e0 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"1.0\"^^ .\n" + " \"1\"^^ .\n" + " \"1.0e0\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_20) { + + TurtleStringParser turtleParser("@prefix : .\n" + ":a :b -1.0 .\n" + ":c :d -1 .\n" + ":e :f -1.0e0 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"-1.0\"^^ .\n" + " \"-1\"^^ .\n" + " \"-1.0e0\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_21) { + + TurtleStringParser turtleParser("# Test long literal\n" + "@prefix : .\n" + ":a :b \"\"\"John said: \"Hello World!\\\"\"\"\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"John said: \\\"Hello World!\\\"\" ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_22) { + + TurtleStringParser turtleParser("@prefix : .\n" + ":a :b true .\n" + ":c :d false ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"true\"^^ .\n" + " \"false\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, DISABLED_turtle_subm_23) { + + TurtleStringParser turtleParser("# comment test\n" + "@prefix : .\n" + ":a :b :c . # end of line comment\n" + ":d # ignore me\n" + " :e # and me\n" + " :f # and me\n" + " .\n" + ":g :h #ignore me\n" + " :i, # and me\n" + " :j . # and me\n" + "\n" + ":k :l :m ; #ignore me\n" + " :n :o ; # and me\n" + " :p :q . # and me"); + //bool a = turtleParser.isContentParsable(); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " .\n" + " .\n" + " .\n" + " .\n" + " .\n" + " .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, DISABLED_turtle_subm_24) { + + TurtleStringParser turtleParser("# comment line with no final newline test\n" + "@prefix : .\n" + ":a :b :c .\n" + "#foo"); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_25) { + + TurtleStringParser turtleParser("@prefix foo: .\n" + "@prefix foo: .\n" + "\n" + "foo:blah foo:blah foo:blah .\n" + ""); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_26) { + + TurtleStringParser turtleParser( + " \"2.345\"^^ .\n" + " \"1\"^^ .\n" + " \"1.0\"^^ .\n" + " \"1.\"^^ .\n" + " \"1.000000000\"^^ .\n" + " \"2.3\"^^ .\n" + " \"2.234000005\"^^ .\n" + " \"2.2340000005\"^^ .\n" + " \"2.23400000005\"^^ .\n" + " \"2.234000000005\"^^ .\n" + " \"2.2340000000005\"^^ .\n" + " \"2.23400000000005\"^^ .\n" + " \"2.234000000000005\"^^ .\n" + " \"2.2340000000000005\"^^ .\n" + " \"2.23400000000000005\"^^ .\n" + " \"2.234000000000000005\"^^ .\n" + " \"2.2340000000000000005\"^^ .\n" + " \"2.23400000000000000005\"^^ .\n" + " \"2.234000000000000000005\"^^ .\n" + " \"2.2340000000000000000005\"^^ .\n" + " \"2.23400000000000000000005\"^^ .\n" + " \"1.2345678901234567890123457890\"^^ ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"2.345\"^^ .\n" + " \"1\"^^ .\n" + " \"1.0\"^^ .\n" + " \"1.\"^^ .\n" + " \"1.000000000\"^^ .\n" + " \"2.3\"^^ .\n" + " \"2.234000005\"^^ .\n" + " \"2.2340000005\"^^ .\n" + " \"2.23400000005\"^^ .\n" + " \"2.234000000005\"^^ .\n" + " \"2.2340000000005\"^^ .\n" + " \"2.23400000000005\"^^ .\n" + " \"2.234000000000005\"^^ .\n" + " \"2.2340000000000005\"^^ .\n" + " \"2.23400000000000005\"^^ .\n" + " \"2.234000000000000005\"^^ .\n" + " \"2.2340000000000000005\"^^ .\n" + " \"2.23400000000000000005\"^^ .\n" + " \"2.234000000000000000005\"^^ .\n" + " \"2.2340000000000000000005\"^^ .\n" + " \"2.23400000000000000000005\"^^ .\n" + " \"1.2345678901234567890123457890\"^^ ."); + } + + TEST(TurtleOfficialEvaluationTests, turtle_subm_27) { + + TurtleStringParser turtleParser( + "# In-scope base URI is at this point\n" + " .\n" + "@base .\n" + "# In-scope base URI is http://example.org/ns/ at this point\n" + " .\n" + "@base .\n" + "# In-scope base URI is http://example.org/ns/foo/ at this point\n" + " .\n" + "@prefix : .\n" + ":a4 :b4 :c4 .\n" + "@prefix : .\n" + ":a5 :b5 :c5 ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " .\n" + " .\n" + " .\n" + " .\n" + " ."); + } + + TEST(TurtleOfficialEvaluationTests, two_LITERAL_LONG2s) { + + TurtleStringParser turtleParser("# Test long literal twice to ensure it does not over-quote\n" + "@prefix : .\n" + ":a :b \"\"\"first long literal\"\"\" .\n" + ":c :d \"\"\"second long literal\"\"\" ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser( + " \"first long literal\" .\n" + " \"second long literal\" ."); + } + + TEST(TurtleOfficialEvaluationTests, underscore_in_localName) { + + TurtleStringParser turtleParser("@prefix p: .\n" + "p:s_ ."); + auto it = turtleParser.begin(); + TurtleStringParser nTriplesParser(" ."); + } +}// namespace Dice::tests::rdf_parser::turtle_official_evaluation_tests \ No newline at end of file diff --git a/tests/TurtleOfficialNegativeEvaluationTests.cpp b/tests/TurtleOfficialNegativeEvaluationTests.cpp index 1c66bad..352bf5e 100644 --- a/tests/TurtleOfficialNegativeEvaluationTests.cpp +++ b/tests/TurtleOfficialNegativeEvaluationTests.cpp @@ -1,30 +1 @@ -#include -#include - -TEST(TurtleOfficialNegativeEvaluationTests, turtle_eval_bad_01) { - TurtlePegtlStates::State turtle_state; - TurtlePegtlParser<>::parseStringwithAction("# Bad IRI : good escape, bad charcater\n" - " .", - turtle_state); -} - -TEST(TurtleOfficialNegativeEvaluationTests, turtle_eval_bad_02) { - TurtlePegtlStates::State turtle_state; - TurtlePegtlParser<>::parseStringwithAction("# Bad IRI : hex 3C is <\n" - " .", - turtle_state); -} - -TEST(TurtleOfficialNegativeEvaluationTests, turtle_eval_bad_03) { - TurtlePegtlStates::State turtle_state; - TurtlePegtlParser<>::parseStringwithAction("# Bad IRI : hex 3E is >\n" - " .", - turtle_state); -} - -TEST(TurtleOfficialNegativeEvaluationTests, turtle_eval_bad_04) { - TurtlePegtlStates::State turtle_state; - TurtlePegtlParser<>::parseStringwithAction("# Bad IRI\n" - " .", - turtle_state); -} \ No newline at end of file +//ToDo \ No newline at end of file diff --git a/tests/TurtleOfficialNegativeTests.cpp b/tests/TurtleOfficialNegativeTests.cpp index 43dbafa..5b3077c 100644 --- a/tests/TurtleOfficialNegativeTests.cpp +++ b/tests/TurtleOfficialNegativeTests.cpp @@ -1,584 +1,585 @@ #include -#include -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_base_01) { +#include - TurtleParser parser("@base ."); - ASSERT_EQ(parser.isContentParsable(), false); +namespace Dice::tests::rdf_parser::turtle_official_negative_tests { + using namespace Dice::rdf_parser; -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_base_01) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_base_02) { + ASSERT_EQ(TurtleStringParser::isParsable("@base ."), false); + } - TurtleParser parser( - "@BASE ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_base_02) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_base_03) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@BASE ."), + false); + } - TurtleParser parser( - "BASE .\n" - "

."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_base_03) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_blank_label_dot_end) { + ASSERT_EQ(TurtleStringParser::isParsable( + "BASE .\n" + "

."), + false); + } - TurtleParser parser( - "_:b1. :p :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_blank_label_dot_end) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_esc_01) { + ASSERT_EQ(TurtleStringParser::isParsable( + "_:b1. :p :o ."), + false); + } - TurtleParser parser( - " \"a\\zb\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_esc_01) { -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_esc_02) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \"a\\zb\" ."), + false); + } - TurtleParser parser( - " \"\\uWXYZ\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_esc_02) { -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_esc_03) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\\uWXYZ\" ."), + false); + } - TurtleParser parser( - " \"\\U0000WXYZ\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_esc_03) { -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_esc_04) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\\U0000WXYZ\" ."), + false); + } - TurtleParser parser( - " \"\\U0000WXYZ\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_esc_04) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_01) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\\U0000WXYZ\" ."), + false); + } - TurtleParser parser("@prefix : .\n" - ":s A :C ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_01) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_02) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s A :C ."), + false); + } - TurtleParser parser("@prefix : .\n" - "a :p :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_02) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_03) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "a :p :o ."), + false); + } - TurtleParser parser("@prefix : .\n" - ":s :p a ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_03) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_04) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p a ."), + false); + } - TurtleParser parser("@prefix : .\n" - "true :p :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_04) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_05) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "true :p :o ."), + false); + } - TurtleParser parser("@prefix : .\n" - ":s true :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_kw_05) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_lang_01) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s true :o ."), + false); + } - TurtleParser parser( - " \"string\"@1 ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_lang_01) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_LITERAL2_with_langtag_and_datatype) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \"string\"@1 ."), + false); + } - TurtleParser parser( - " \"value\"@en^^ ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_LITERAL2_with_langtag_and_datatype) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ln_dash_start) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \"value\"@en^^ ."), + false); + } - TurtleParser parser("@prefix : .\n" - ":s :p :-o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ln_dash_start) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ln_escape) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p :-o ."), + false); + } - TurtleParser parser("@prefix : .\n" - ":s :p :o%2 ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ln_escape) { -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_ln_escape_start) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p :o%2 ."), + false); + } - TurtleParser parser("@prefix : .\n" - ":s :p :%2o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_ln_escape_start) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_missing_ns_dot_end) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p :%2o ."), + false); + } - TurtleParser parser("valid:s valid:p invalid.:o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_missing_ns_dot_end) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_missing_ns_dot_start) { + ASSERT_EQ(TurtleStringParser::isParsable("valid:s valid:p invalid.:o ."), false); + } - TurtleParser parser(".undefined:s .undefined:p .undefined:o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_missing_ns_dot_start) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_01) { + ASSERT_EQ(TurtleStringParser::isParsable(".undefined:s .undefined:p .undefined:o ."), false); + } - TurtleParser parser( - "@prefix : .\n" - "\n" - "{ :a :q :c . } :p :z .\n" - ""); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_01) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_02) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + "\n" + "{ :a :q :c . } :p :z .\n" + ""), + false); + } - TurtleParser parser("# = is not Turtle\n" - "@prefix : .\n" - "\n" - ":a = :b ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_02) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_03) { + ASSERT_EQ(TurtleStringParser::isParsable("# = is not Turtle\n" + "@prefix : .\n" + "\n" + ":a = :b ."), + false); + } - TurtleParser parser( - "@prefix : .\n" - "@prefix ns: .\n" - "\n" - ":x.\n" - " ns:p.\n" - " ns:q :p :z ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_03) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_04) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + "@prefix ns: .\n" + "\n" + ":x.\n" + " ns:p.\n" + " ns:q :p :z ."), + false); + } - TurtleParser parser( - "@prefix : .\n" - "@prefix ns: .\n" - "\n" - ":x^ns:p :p :z ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_04) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_05) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + "@prefix ns: .\n" + "\n" + ":x^ns:p :p :z ."), + false); + } - TurtleParser parser( - "@prefix : .\n" - "\n" - ":z is :p of :x ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_05) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_06) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + "\n" + ":z is :p of :x ."), + false); + } - TurtleParser parser( - "@prefix : .\n" - "\n" - ":a.:b.:c ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_06) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_07) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + "\n" + ":a.:b.:c ."), + false); + } - TurtleParser parser( - "@keywords a .\n" - "x a Item ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_07) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_08) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@keywords a .\n" + "x a Item ."), + false); + } - TurtleParser parser( - "@keywords a .\n" - "x a Item ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_08) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_09) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@keywords a .\n" + "x a Item ."), + false); + } - TurtleParser parser( - "@prefix : .\n" - ":s => :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_09) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_10) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":s => :o ."), + false); + } - TurtleParser parser( - "@prefix : .\n" - ":s <= :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} - -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_11) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_10) { - TurtleParser parser( - "@prefix : .\n" - "@forSome :x ."); - ASSERT_EQ(parser.isContentParsable(), false); -} - -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_12) { - - TurtleParser parser( - "@prefix : .\n" - "@forAll :x ."); - ASSERT_EQ(parser.isContentParsable(), false); -} - -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_13) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":s <= :o ."), + false); + } - TurtleParser parser( - "@keywords .\n" - "x @a Item ."); - ASSERT_EQ(parser.isContentParsable(), false); -} - -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ns_dot_end) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_11) { - TurtleParser parser("@prefix eg. : .\n" - "eg.:s eg.:p eg.:o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} - -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ns_dot_start) { + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + "@forSome :x ."), + false); + } + + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_12) { + + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + "@forAll :x ."), + false); + } + + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_n3_extras_13) { - TurtleParser parser("@prefix .eg : .\n" - ".eg:s .eg:p .eg:o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@keywords .\n" + "x @a Item ."), + false); + } + + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ns_dot_end) { -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_01) { + ASSERT_EQ(TurtleStringParser::isParsable("@prefix eg. : .\n" + "eg.:s eg.:p eg.:o ."), + false); + } + + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_ns_dot_start) { - TurtleParser parser( - " 123.abc ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix .eg : .\n" + ".eg:s .eg:p .eg:o ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_02) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_01) { - TurtleParser parser( - " 123e ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " 123.abc ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_03) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_02) { - TurtleParser parser( - " 123abc ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " 123e ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_04) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_03) { - TurtleParser parser( - " 0x123 ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " 123abc ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_05) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_04) { - TurtleParser parser( - " +-1 ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " 0x123 ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_number_dot_in_anon) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_num_05) { - TurtleParser parser("@prefix : .\n" - "\n" - ":s\n" - "\t:p [\n" - "\t\t:p1 27.\n" - "\t] ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " +-1 ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_pname_01) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_number_dot_in_anon) { - TurtleParser parser( - "@prefix : .\n" - ":a~b :p :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "\n" + ":s\n" + "\t:p [\n" + "\t\t:p1 27.\n" + "\t] ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_pname_02) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_pname_01) { - TurtleParser parser( - "@prefix : .\n" - ":a%2 :p :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":a~b :p :o ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_pname_03) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_pname_02) { - TurtleParser parser( - "@prefix : .\n" - ":a\\u0039 :p :o ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":a%2 :p :o ."), + false); + } -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_prefix_01) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_pname_03) { - TurtleParser parser( - ":s \"x\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":a\\u0039 :p :o ."), + false); + } -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_prefix_02) { + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_prefix_01) { - TurtleParser parser( - "@prefix rdf: .\n" - " rdf:type :C ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + ":s \"x\" ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_prefix_03) { + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_prefix_02) { - TurtleParser parser( - "@prefix ex: ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix rdf: .\n" + " rdf:type :C ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_prefix_04) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_prefix_03) { - TurtleParser parser( - "@prefix ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix ex: ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_prefix_05) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_prefix_04) { - TurtleParser parser( - "@prefix x ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_01) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_prefix_05) { - TurtleParser parser("@prefix : .\n" - ":s :p \"abc' ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix x ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_02) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_01) { - TurtleParser parser("@prefix : .\n" - ":s :p 'abc\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p \"abc' ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_03) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_02) { - TurtleParser parser("@prefix : .\n" - ":s :p '''abc' ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p 'abc\" ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_04) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_03) { - TurtleParser parser("@prefix : .\n" - ":s :p \"\"\"abc''' ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p '''abc' ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_05) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_04) { - TurtleParser parser( - "@prefix : .\n" - ":s :p \"\"\"abc\n" - "def"); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p \"\"\"abc''' ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_06) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_05) { - TurtleParser parser( - "@prefix : .\n" - ":s :p \"\"\"abc\"\"\"\"@en ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":s :p \"\"\"abc\n" + "def"), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_07) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_06) { - TurtleParser parser( - "@prefix : .\n" - ":s :p '''abc''''@en ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":s :p \"\"\"abc\"\"\"\"@en ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_01) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_string_07) { - TurtleParser parser( - "{ }"); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "@prefix : .\n" + ":s :p '''abc''''@en ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_02) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_01) { - TurtleParser parser( - " = ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "{ }"), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_03) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_02) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " = ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_04) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_03) { - TurtleParser parser( - "\"hello\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_05) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_04) { - TurtleParser parser( - " \"hello\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "\"hello\" ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_06) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_05) { - TurtleParser parser( - " [] ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"hello\" ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_07) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_06) { - TurtleParser parser( - " _:p ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " [] ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_08) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_07) { - TurtleParser parser( - " "); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " _:p ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_09) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_08) { - TurtleParser parser( - " . ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " "), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_10) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_09) { - TurtleParser parser( - " . .\n" - " ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " . ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_11) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_10) { - TurtleParser parser( - " ;"); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " . .\n" + " ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_12) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_11) { - TurtleParser parser( - " "); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ;"), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_13) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_12) { - TurtleParser parser( - " "); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " "), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_14) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_13) { - TurtleParser parser( - "\"abc\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " "), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_15) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_14) { - TurtleParser parser( - " \"abc\" ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + "\"abc\" ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_16) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_15) { - TurtleParser parser( - " [] ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"abc\" ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_17) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_16) { - TurtleParser parser( - " _:a ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " [] ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_uri_01) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_struct_17) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " _:a ."), + false); + } -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_uri_02) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_uri_01) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + false); + } -TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_uri_03) { + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_uri_02) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_uri_04) { + TEST(TurtleOfficialNegativeTests, DISABLED_turtle_syntax_bad_uri_03) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + false); + } -TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_uri_05) { + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_uri_04) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), false); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + false); + } + + TEST(TurtleOfficialNegativeTests, turtle_syntax_bad_uri_05) { + + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + false); + } +}// namespace Dice::tests::rdf_parser::turtle_official_negative_tests \ No newline at end of file diff --git a/tests/TurtleOfficialPositiveTests.cpp b/tests/TurtleOfficialPositiveTests.cpp index fd16542..5e0a4d4 100644 --- a/tests/TurtleOfficialPositiveTests.cpp +++ b/tests/TurtleOfficialPositiveTests.cpp @@ -1,864 +1,819 @@ #include -#include +#include -TEST(TurtleOfficialPositiveTests, anonymous_blank_node_object) { +namespace Dice::tests::rdf_parser::turtle_official_positive_tests { + using namespace Dice::rdf_parser; - TurtleParser parser(" [] ."); - ASSERT_EQ(parser.isContentParsable(), true); + TEST(TurtleOfficialPositiveTests, anonymous_blank_node_object) { + ASSERT_EQ(TurtleStringParser::isParsable(" [] ."), true); + } -} - -TEST(TurtleOfficialPositiveTests, anonymous_blank_node_subject) { - - TurtleParser parser("[] ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + TEST(TurtleOfficialPositiveTests, anonymous_blank_node_subject) { + ASSERT_EQ(TurtleStringParser::isParsable("[] ."), true); + } -TEST(TurtleOfficialPositiveTests, bareword_integer) { - TurtleParser parser(" 1 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} - -TEST(TurtleOfficialPositiveTests, comment_following_localName) { + TEST(TurtleOfficialPositiveTests, bareword_integer) { - TurtleParser parser("@prefix p: .\n" - " p:o#comment\n" - "."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable(" 1 ."), true); + } -TEST(TurtleOfficialPositiveTests, default_namespace_IRI) { + TEST(TurtleOfficialPositiveTests, comment_following_localName) { - TurtleParser parser("@prefix : .\n" - ":s ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix p: .\n" + " p:o#comment\n" + "."), + true); + } -TEST(TurtleOfficialPositiveTests, IRI_spo) { + TEST(TurtleOfficialPositiveTests, default_namespace_IRI) { - TurtleParser parser(" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s ."), + true); + } -TEST(TurtleOfficialPositiveTests, IRI_subject) { + TEST(TurtleOfficialPositiveTests, IRI_spo) { - TurtleParser parser(" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable(" ."), true); + } -TEST(TurtleOfficialPositiveTests, IRI_with_eight_digit_numeric_escape) { + TEST(TurtleOfficialPositiveTests, IRI_subject) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable(" ."), true); + } -TEST(TurtleOfficialPositiveTests, IRI_with_four_digit_numeric_escape) { + TEST(TurtleOfficialPositiveTests, IRI_with_eight_digit_numeric_escape) { - TurtleParser parser(" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_leading_digit) { + TEST(TurtleOfficialPositiveTests, IRI_with_four_digit_numeric_escape) { - TurtleParser parser(" _:0 ."); - ASSERT_EQ(parser.isContentParsable(), true); + ASSERT_EQ(TurtleStringParser::isParsable(" ."), true); + } -} + TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_leading_digit) { -TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_leading_underscore) { + ASSERT_EQ(TurtleStringParser::isParsable(" _:0 ."), true); + } - TurtleParser parser(" _:_ ."); - ASSERT_EQ(parser.isContentParsable(), true); + TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_leading_underscore) { -} + ASSERT_EQ(TurtleStringParser::isParsable(" _:_ ."), true); + } -TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_non_leading_extras) { + TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_non_leading_extras) { - TurtleParser parser(" _:a·̀ͯ‿.⁀ ."); - ASSERT_EQ(parser.isContentParsable(), true); + ASSERT_EQ(TurtleStringParser::isParsable(" _:a·̀ͯ‿.⁀ ."), true); + } -} + /*TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_PN_CHARS_BASE_character_boundaries) { -/*TEST(TurtleOfficialPositiveTests, labeled_blank_node_with_PN_CHARS_BASE_character_boundaries) { - - TurtleParserparser( - " _:AZazÀÖØöø˿Ͱͽ\u037F\u1FFF\u200C\u200D⁰\u218FⰀ\u2FEF、\uD7FF豈\uFDCFﷰ�\uD800\uDC00\uDB7F\uDFFD ."); - ASSERT_EQ(parser.isContentParsable(), true); + RdfStringRdfStringParserRdfStringParser( + " _:AZazÀÖØöø˿Ͱͽ\u037F\u1FFF\u200C\u200D⁰\u218FⰀ\u2FEF、\uD7FF豈\uFDCFﷰ�\uD800\uDC00\uDB7F\uDFFD ."),true); + }*/ -TEST(TurtleOfficialPositiveTests, langtagged_LONG) { + TEST(TurtleOfficialPositiveTests, langtagged_LONG) { - TurtleParser parser(" \"\"\"chat\"\"\"@en ."); - ASSERT_EQ(parser.isContentParsable(), true); + ASSERT_EQ(TurtleStringParser::isParsable(" \"\"\"chat\"\"\"@en ."), true); + } -} + TEST(TurtleOfficialPositiveTests, LITERAL1_all_controls) { -TEST(TurtleOfficialPositiveTests, LITERAL1_all_controls) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\\t\\u000B\\u000C\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F\" ."), + true); + } - TurtleParser parser( - " \"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\\t\\u000B\\u000C\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F\" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + /*TEST(TurtleOfficialPositiveTests, LITERAL1_with_UTF8_boundaries) { -/*TEST(TurtleOfficialPositiveTests, LITERAL1_with_UTF8_boundaries) { - - TurtleParserparser( - " '\u0080\u07FFࠀ\u0FFFက쿿퀀\uD7FF\uE000�\uD800\uDC00\uD8BF\uDFFD\uD8C0\uDC00\uDBBF\uDFFD\uDBC0\uDC00\uDBFF\uDFFD' ."); - ASSERT_EQ(parser.isContentParsable(), true); + RdfStringRdfStringParserRdfStringParser( + " '\u0080\u07FFࠀ\u0FFFက쿿퀀\uD7FF\uE000�\uD800\uDC00\uD8BF\uDFFD\uD8C0\uDC00\uDBBF\uDFFD\uDBC0\uDC00\uDBFF\uDFFD' ."),true); + }*/ -TEST(TurtleOfficialPositiveTests, LITERAL2) { - - TurtleParser parser(" \"x\" ."); - ASSERT_EQ(parser.isContentParsable(), true); - -} - -TEST(TurtleOfficialPositiveTests, LITERAL2_ascii_boundaries) { - - TurtleParser parser( - " \"\\u0000\\t\\u000B\\u000C\\u000E!#[]\\u007F\" ."); - ASSERT_EQ(parser.isContentParsable(), true); + TEST(TurtleOfficialPositiveTests, LITERAL2) { -} + ASSERT_EQ(TurtleStringParser::isParsable(" \"x\" ."), true); + } -TEST(TurtleOfficialPositiveTests, LITERAL2_with_UTF8_boundaries) { + TEST(TurtleOfficialPositiveTests, LITERAL2_ascii_boundaries) { - TurtleParser parser( - " \" \t\u000B\f\u000E!#[]\u007F\" ."); - ASSERT_EQ(parser.isContentParsable(), true); + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\\u0000\\t\\u000B\\u000C\\u000E!#[]\\u007F\" ."), + true); + } -} + TEST(TurtleOfficialPositiveTests, LITERAL2_with_UTF8_boundaries) { -TEST(TurtleOfficialPositiveTests, LITERAL_LONG1) { + ASSERT_EQ(TurtleStringParser::isParsable( + " \" \t\u000B\f\u000E!#[]\u007F\" ."), + true); + } - TurtleParser parser(" '''x''' ."); - ASSERT_EQ(parser.isContentParsable(), true); + TEST(TurtleOfficialPositiveTests, LITERAL_LONG1) { -} -/* + ASSERT_EQ(TurtleStringParser::isParsable(" '''x''' ."), true); + } + /* TEST(TurtleOfficialPositiveTests, LITERAL_LONG1_with_UTF8_boundaries) { - - TurtleParserparser( - " '''\u0080\u07FFࠀ\u0FFFက쿿퀀\uD7FF\uE000�\uD800\uDC00\uD8BF\uDFFD\uD8C0\uDC00\uDBBF\uDFFD\uDBC0\uDC00\uDBFF\uDFFD''' ."); - ASSERT_EQ(parser.isContentParsable(), true); + + RdfStringRdfStringParserRdfStringParser( + " '''\u0080\u07FFࠀ\u0FFFက쿿퀀\uD7FF\uE000�\uD800\uDC00\uD8BF\uDFFD\uD8C0\uDC00\uDBBF\uDFFD\uDBC0\uDC00\uDBFF\uDFFD''' ."),true); + }*/ -TEST(TurtleOfficialPositiveTests, LITERAL_LONG2) { + TEST(TurtleOfficialPositiveTests, LITERAL_LONG2) { - TurtleParser parser(" \"\"\"x\"\"\" ."); - ASSERT_EQ(parser.isContentParsable(), true); + ASSERT_EQ(TurtleStringParser::isParsable(" \"\"\"x\"\"\" ."), true); + } -} + /*TEST(TurtleOfficialPositiveTests, LITERAL_LONG2_with_UTF8_boundaries) { -/*TEST(TurtleOfficialPositiveTests, LITERAL_LONG2_with_UTF8_boundaries) { - - TurtleParserparser(" \"\"\"\u0080\u07FFࠀ\u0FFFက쿿퀀\uD7FF\uE000�\uD800\uDC00\uD8BF\uDFFD\uD8C0\uDC00\uDBBF\uDFFD\uDBC0\uDC00\uDBFF\uDFFD\"\"\" ."); - ASSERT_EQ(parser.isContentParsable(), true); + RdfStringRdfStringParserRdfStringParser(" \"\"\"\u0080\u07FFࠀ\u0FFFက쿿퀀\uD7FF\uE000�\uD800\uDC00\uD8BF\uDFFD\uD8C0\uDC00\uDBBF\uDFFD\uDBC0\uDC00\uDBFF\uDFFD\"\"\" ."),true); + }*/ -TEST(TurtleOfficialPositiveTests, literal_with_escaped_BACKSPACE) { - - TurtleParser parser(" '\\b' ."); - ASSERT_EQ(parser.isContentParsable(), true); - -} - -TEST(TurtleOfficialPositiveTests, literal_with_escaped_CARRIAGE_RETURN) { - - TurtleParser parser(" '\\r' ."); - ASSERT_EQ(parser.isContentParsable(), true); + TEST(TurtleOfficialPositiveTests, literal_with_escaped_BACKSPACE) { -} + ASSERT_EQ(TurtleStringParser::isParsable(" '\\b' ."), true); + } -TEST(TurtleOfficialPositiveTests, literal_with_escaped_CHARACTER_TABULATION) { + TEST(TurtleOfficialPositiveTests, literal_with_escaped_CARRIAGE_RETURN) { - TurtleParser parser(" '\\t' ."); - ASSERT_EQ(parser.isContentParsable(), true); + ASSERT_EQ(TurtleStringParser::isParsable(" '\\r' ."), true); + } -} + TEST(TurtleOfficialPositiveTests, literal_with_escaped_CHARACTER_TABULATION) { -TEST(TurtleOfficialPositiveTests, literal_with_escaped_FORM_FEED) { + ASSERT_EQ(TurtleStringParser::isParsable(" '\\t' ."), true); + } - TurtleParser parser(" '\\f' ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + TEST(TurtleOfficialPositiveTests, literal_with_escaped_FORM_FEED) { -TEST(TurtleOfficialPositiveTests, literal_with_escaped_LINE_FEED) { + ASSERT_EQ(TurtleStringParser::isParsable(" '\\f' ."), true); + } - TurtleParser parser(" '\\n' ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + TEST(TurtleOfficialPositiveTests, literal_with_escaped_LINE_FEED) { -TEST(TurtleOfficialPositiveTests, literal_with_numeric_escape8) { + ASSERT_EQ(TurtleStringParser::isParsable(" '\\n' ."), true); + } - TurtleParser parser(" '\\U0000006F' ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + TEST(TurtleOfficialPositiveTests, literal_with_numeric_escape8) { -TEST(TurtleOfficialPositiveTests, LITERAL_with_UTF8_boundaries) { + ASSERT_EQ(TurtleStringParser::isParsable(" '\\U0000006F' ."), true); + } - TurtleParser parser( - " \"\\u0080\\u07FF\\u0800\\u0FFF\\u1000\\uCFFF\\uD000\\uD7FF\\uE000\\uFFFD\\U00010000\\U0003FFFD\\U00040000\\U000FFFFD\\U00100000\\U0010FFFD\" ."); - ASSERT_EQ(parser.isContentParsable(), true); + TEST(TurtleOfficialPositiveTests, LITERAL_with_UTF8_boundaries) { -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\\u0080\\u07FF\\u0800\\u0FFF\\u1000\\uCFFF\\uD000\\uD7FF\\uE000\\uFFFD\\U00010000\\U0003FFFD\\U00040000\\U000FFFFD\\U00100000\\U0010FFFD\" ."), + true); + } -TEST(TurtleOfficialPositiveTests, old_style_base) { + TEST(TurtleOfficialPositiveTests, old_style_base) { - TurtleParser parser("@base .\n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@base .\n" + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, old_style_prefix) { + TEST(TurtleOfficialPositiveTests, old_style_prefix) { - TurtleParser parser("@prefix p: .\n" - "p:s ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix p: .\n" + "p:s ."), + true); + } -TEST(TurtleOfficialPositiveTests, prefix_only_IRI) { + TEST(TurtleOfficialPositiveTests, prefix_only_IRI) { - TurtleParser parser("@prefix p: .\n" - "p: ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix p: .\n" + "p: ."), + true); + } -TEST(TurtleOfficialPositiveTests, prefix_with_non_leading_extras) { + TEST(TurtleOfficialPositiveTests, prefix_with_non_leading_extras) { - TurtleParser parser("@prefix a·̀ͯ‿.⁀: .\n" - "a·̀ͯ‿.⁀:s ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix a·̀ͯ‿.⁀: .\n" + "a·̀ͯ‿.⁀:s ."), + true); + } -/* + /* TEST(TurtleOfficialPositiveTests, prefix_with_PN_CHARS_BASE_character_boundaries) { - - TurtleParserparser("@prefix AZazÀÖØöø˿Ͱͽ\u037F\u1FFF\u200C\u200D⁰\u218FⰀ\u2FEF、\uD7FF豈\uFDCFﷰ�\uD800\uDC00\uDB7F\uDFFD: .\n" - " AZazÀÖØöø˿Ͱͽ\u037F\u1FFF\u200C\u200D⁰\u218FⰀ\u2FEF、\uD7FF豈\uFDCFﷰ�\uD800\uDC00\uDB7F\uDFFD:o ."); - ASSERT_EQ(parser.isContentParsable(), true); + + RdfStringRdfStringParserRdfStringParser("@prefix AZazÀÖØöø˿Ͱͽ\u037F\u1FFF\u200C\u200D⁰\u218FⰀ\u2FEF、\uD7FF豈\uFDCFﷰ�\uD800\uDC00\uDB7F\uDFFD: .\n" + " AZazÀÖØöø˿Ͱͽ\u037F\u1FFF\u200C\u200D⁰\u218FⰀ\u2FEF、\uD7FF豈\uFDCFﷰ�\uD800\uDC00\uDB7F\uDFFD:o ."),true); + } */ -TEST(TurtleOfficialPositiveTests, prefixed_IRI_object) { + TEST(TurtleOfficialPositiveTests, prefixed_IRI_object) { - TurtleParser parser("@prefix p: .\n" - " p:o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix p: .\n" + " p:o ."), + true); + } -TEST(TurtleOfficialPositiveTests, prefixed_IRI_predicate) { + TEST(TurtleOfficialPositiveTests, prefixed_IRI_predicate) { - TurtleParser parser("@prefix p: .\n" - " p:p ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix p: .\n" + " p:p ."), + true); + } -TEST(TurtleOfficialPositiveTests, prefixed_name_datatype) { + TEST(TurtleOfficialPositiveTests, prefixed_name_datatype) { - TurtleParser parser("@prefix xsd: .\n" - " \"1\"^^xsd:integer ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix xsd: .\n" + " \"1\"^^xsd:integer ."), + true); + } -TEST(TurtleOfficialPositiveTests, repeated_semis_at_end) { + TEST(TurtleOfficialPositiveTests, repeated_semis_at_end) { - TurtleParser parser( - " ;; ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ;; ."), + true); + } -TEST(TurtleOfficialPositiveTests, sole_blankNodePropertyList) { + TEST(TurtleOfficialPositiveTests, sole_blankNodePropertyList) { - TurtleParser parser("[ ] ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("[ ] ."), true); + } -TEST(TurtleOfficialPositiveTests, SPARQL_style_base) { + TEST(TurtleOfficialPositiveTests, SPARQL_style_base) { - TurtleParser parser("BASE \n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("BASE \n" + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, SPARQL_style_prefix) { + TEST(TurtleOfficialPositiveTests, SPARQL_style_prefix) { - TurtleParser parser("PREFIX p: \n" - "p:s ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("PREFIX p: \n" + "p:s ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_base_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_base_01) { - TurtleParser parser("@base ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@base ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_base_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_base_02) { - TurtleParser parser("BASE "); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("BASE "), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_base_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_base_03) { - TurtleParser parser("@base .\n" - "

."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@base .\n" + "

."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_base_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_base_04) { - TurtleParser parser("base \n" - "

."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("base \n" + "

."), + true); + } -TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_blank_label) { + TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_blank_label) { - TurtleParser parser("@prefix : .\n" - "_:0b :p :o . # Starts with digit\n" - "_:_b :p :o . # Starts with underscore\n" - "_:b.0 :p :o . # Contains dot, ends with digit"); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "_:0b :p :o . # Starts with digit\n" + "_:_b :p :o . # Starts with underscore\n" + "_:b.0 :p :o . # Contains dot, ends with digit"), + true); + } -TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_blank_label_TEST) { + TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_blank_label_TEST) { - TurtleParser parser("@prefix : .\n" - "_:0b :p :o . # Starts with digit\n" - "_:_b :p :o . # Starts with underscore\n" - "_:b.0 :p :o . # Contains dot, ends with digit"); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "_:0b :p :o . # Starts with digit\n" + "_:_b :p :o . # Starts with underscore\n" + "_:b.0 :p :o . # Contains dot, ends with digit"), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_01) { - TurtleParser parser("@prefix : .\n" - "[] :p :o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "[] :p :o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_02) { - TurtleParser parser("@prefix : .\n" - ":s :p [] ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p [] ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_03) { - TurtleParser parser("@prefix : .\n" - ":s :p [ :q :o ] ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p [ :q :o ] ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_04) { - TurtleParser parser("@prefix : .\n" - ":s :p [ :q1 :o1 ; :q2 :o2 ] ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p [ :q1 :o1 ; :q2 :o2 ] ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_05) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_05) { - TurtleParser parser("@prefix : .\n" - "[ :q1 :o1 ; :q2 :o2 ] :p :o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "[ :q1 :o1 ; :q2 :o2 ] :p :o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_06) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_06) { - TurtleParser parser("@prefix : .\n" - "_:a :p :o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "_:a :p :o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_07) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_07) { - TurtleParser parser("@prefix : .\n" - ":s :p _:a .\n" - "_:a :p :o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p _:a .\n" + "_:a :p :o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_08) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_08) { - TurtleParser parser("@prefix : .\n" - "[ :p :o ] ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "[ :p :o ] ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_09) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_09) { - TurtleParser parser("@prefix : .\n" - "[ :p :o1,:2 ] .\n" - ":s :p :o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "[ :p :o1,:2 ] .\n" + ":s :p :o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_10) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_bnode_10) { - TurtleParser parser("@prefix : .\n" - "\n" - ":s1 :p :o .\n" - "[ :p1 :o1 ; :p2 :o2 ] .\n" - ":s2 :p :o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "\n" + ":s1 :p :o .\n" + "[ :p1 :o1 ; :p2 :o2 ] .\n" + ":s2 :p :o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_datatypes_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_datatypes_01) { - TurtleParser parser("@prefix xsd: .\n" - "

\"123\"^^xsd:byte ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix xsd: .\n" + "

\"123\"^^xsd:byte ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_datatypes_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_datatypes_02) { - TurtleParser parser("@prefix rdf: .\n" - "@prefix xsd: .\n" - "

\"123\"^^xsd:string ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix rdf: .\n" + "@prefix xsd: .\n" + "

\"123\"^^xsd:string ."), + true); + } -TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_file_01) { + TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_file_01) { - TurtleParser parser(""); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable(""), true); + } -TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_file_02) { + TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_file_02) { - TurtleParser parser("#Empty file."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("#Empty file."), true); + } -TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_file_03) { + TEST(TurtleOfficialPositiveTests, DISABLED_turtle_syntax_file_03) { - TurtleParser parser("#One comment, one empty line.\n" - ""); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("#One comment, one empty line.\n" + ""), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_kw_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_kw_01) { - TurtleParser parser("

true ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

true ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_kw_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_kw_02) { - TurtleParser parser("

false ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

false ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_kw_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_kw_03) { - TurtleParser parser("@prefix : .\n" - ":s a :C ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s a :C ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_01) { - TurtleParser parser("@prefix : .\n" - ":s :p () ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p () ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_02) { - TurtleParser parser("@prefix : .\n" - ":s :p (1 \"2\" :o) ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p (1 \"2\" :o) ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_03) { - TurtleParser parser("@prefix : .\n" - "(1) :p (1) ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "(1) :p (1) ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_04) { - TurtleParser parser("@prefix : .\n" - "(()) :p (()) ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "(()) :p (()) ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_05) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_lists_05) { - TurtleParser parser("@prefix : .\n" - "(1 2 (1 2)) :p (( \"a\") \"b\" :o) ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + "(1 2 (1 2)) :p (( \"a\") \"b\" :o) ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_ln_colons) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_ln_colons) { - TurtleParser parser("@prefix : .\n" - ":s:1 :p:1 :o:1 .\n" - ":s::2 :p::2 :o::2 .\n" - ":3:s :3:p :3 .\n" - "::s ::p ::o .\n" - "::s: ::p: ::o: ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s:1 :p:1 :o:1 .\n" + ":s::2 :p::2 :o::2 .\n" + ":3:s :3:p :3 .\n" + "::s ::p ::o .\n" + "::s: ::p: ::o: ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_ln_dots) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_ln_dots) { - TurtleParser parser("@prefix : .\n" - ":s.1 :p.1 :o.1 .\n" - ":s..2 :p..2 :o..2.\n" - ":3.s :3.p :3."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s.1 :p.1 :o.1 .\n" + ":s..2 :p..2 :o..2.\n" + ":3.s :3.p :3."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_ns_dots) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_ns_dots) { - TurtleParser parser("@prefix e.g: .\n" - "e.g:s e.g:p e.g:o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix e.g: .\n" + "e.g:s e.g:p e.g:o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_01) { - TurtleParser parser("

123 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

123 ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_02) { - TurtleParser parser("

-123 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

-123 ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_03) { - TurtleParser parser("

+123 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

+123 ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_04) { - TurtleParser parser("# This is a decimal.\n" - "

123.0 . "); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# This is a decimal.\n" + "

123.0 . "), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_05) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_05) { - TurtleParser parser("# This is a decimal.\n" - "

.1 . "); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# This is a decimal.\n" + "

.1 . "), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_06) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_06) { - TurtleParser parser("# This is a decimal.\n" - "

-123.0 . "); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# This is a decimal.\n" + "

-123.0 . "), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_07) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_07) { - TurtleParser parser("# This is a decimal.\n" - "

+123.0 . "); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# This is a decimal.\n" + "

+123.0 . "), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_08) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_08) { - TurtleParser parser("# This is an integer\n" - "

123."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# This is an integer\n" + "

123."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_09) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_09) { - TurtleParser parser("

123.0e1 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

123.0e1 ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_10) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_10) { - TurtleParser parser("

-123e-1 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

-123e-1 ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_number_11) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_number_11) { - TurtleParser parser("

123.E+1 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("

123.E+1 ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_pname_esc_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_pname_esc_01) { - TurtleParser parser("@prefix : .\n" - ":s :p :\\~\\.\\-\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\/\\?\\#\\@\\_\\%AA ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p :\\~\\.\\-\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\/\\?\\#\\@\\_\\%AA ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_pname_esc_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_pname_esc_02) { - TurtleParser parser("@prefix : .\n" - ":s :p :0123\\~\\.\\-\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\/\\?\\#\\@\\_\\%AA123 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p :0123\\~\\.\\-\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\/\\?\\#\\@\\_\\%AA123 ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_pname_esc_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_pname_esc_03) { - TurtleParser parser("@prefix : .\n" - ":xyz\\~ :abc\\.: : ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":xyz\\~ :abc\\.: : ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_01) { - TurtleParser parser("@prefix : ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : ."), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_02) { - TurtleParser parser("PreFIX : "); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("PreFIX : "), true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_03) { - TurtleParser parser("PREFIX : \n" - ":s :p :123 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("PREFIX : \n" + ":s :p :123 ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_04) { - TurtleParser parser("@prefix : .\n" - ":s :p :%20 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p :%20 ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_05) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_05) { - TurtleParser parser("@prefix : .\n" - ": : : ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ": : : ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_06) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_06) { - TurtleParser parser("# colon is a legal pname character\n" - "@prefix : .\n" - "@prefix x: .\n" - ":a:b:c x:d:e:f :::: ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# colon is a legal pname character\n" + "@prefix : .\n" + "@prefix x: .\n" + ":a:b:c x:d:e:f :::: ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_07) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_07) { - TurtleParser parser("# dash is a legal pname character\n" - "@prefix x: .\n" - "x:a-b-c x:p x:o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# dash is a legal pname character\n" + "@prefix x: .\n" + "x:a-b-c x:p x:o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_08) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_08) { - TurtleParser parser("# underscore is a legal pname character\n" - "@prefix x: .\n" - "x:_ x:p_1 x:o ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# underscore is a legal pname character\n" + "@prefix x: .\n" + "x:_ x:p_1 x:o ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_09) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_prefix_09) { - TurtleParser parser("# percents\n" - "@prefix : .\n" - "@prefix x: .\n" - ":a%3E x:%25 :a%3Eb ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# percents\n" + "@prefix : .\n" + "@prefix x: .\n" + ":a%3E x:%25 :a%3Eb ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_str_esc_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_str_esc_01) { - TurtleParser parser( - " \"a\\n\" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"a\\n\" ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_str_esc_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_str_esc_02) { - TurtleParser parser( - " \"a\\u0020b\" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"a\\u0020b\" ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_str_esc_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_str_esc_03) { - TurtleParser parser( - " \"a\\U00000020b\" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"a\\U00000020b\" ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_01) { - TurtleParser parser( - " \"string\" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"string\" ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_02) { - TurtleParser parser( - " \"string\"@en ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"string\"@en ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_03) { - TurtleParser parser( - " \"string\"@en-uk ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"string\"@en-uk ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_04) { - TurtleParser parser( - " 'string' ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " 'string' ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_05) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_05) { - TurtleParser parser( - " 'string'@en ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " 'string'@en ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_06) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_06) { - TurtleParser parser( - " 'string'@en-uk ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " 'string'@en-uk ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_07) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_07) { - TurtleParser parser( - " \"\"\"abc\"\"def''ghi\"\"\" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\"\"abc\"\"def''ghi\"\"\" ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_08) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_08) { - TurtleParser parser( - " \"\"\"abc\n" - "def\"\"\" ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\"\"abc\n" + "def\"\"\" ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_09) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_09) { - TurtleParser parser( - " '''abc\n" - "def''' ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " '''abc\n" + "def''' ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_10) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_10) { - TurtleParser parser( - " \"\"\"abc\n" - "def\"\"\"@en ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " \"\"\"abc\n" + "def\"\"\"@en ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_string_11) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_string_11) { - TurtleParser parser( - " '''abc\n" - "def'''@en ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " '''abc\n" + "def'''@en ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_01) { - TurtleParser parser("@prefix : .\n" - ":s :p :o1 , :o2 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p :o1 , :o2 ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_02) { - TurtleParser parser("@prefix : .\n" - ":s :p1 :o1 ;\n" - " :p2 :o2 ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p1 :o1 ;\n" + " :p2 :o2 ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_03) { - TurtleParser parser("@prefix : .\n" - ":s :p1 :o1 ;\n" - " :p2 :o2 ;\n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p1 :o1 ;\n" + " :p2 :o2 ;\n" + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_04) { - TurtleParser parser("@prefix : .\n" - ":s :p1 :o1 ;;\n" - " :p2 :o2 \n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p1 :o1 ;;\n" + " :p2 :o2 \n" + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_05) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_struct_05) { - TurtleParser parser("@prefix : .\n" - ":s :p1 :o1 ;\n" - " :p2 :o2 ;;\n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("@prefix : .\n" + ":s :p1 :o1 ;\n" + " :p2 :o2 ;;\n" + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_01) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_01) { - TurtleParser parser( - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable( + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_02) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_02) { - TurtleParser parser("# x53 is capital S\n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# x53 is capital S\n" + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_03) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_03) { - TurtleParser parser("# x53 is capital S\n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} + ASSERT_EQ(TurtleStringParser::isParsable("# x53 is capital S\n" + " ."), + true); + } -TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_04) { + TEST(TurtleOfficialPositiveTests, turtle_syntax_uri_04) { - TurtleParser parser("# IRI with all chars in it.\n" - " \n" - " ."); - ASSERT_EQ(parser.isContentParsable(), true); -} \ No newline at end of file + ASSERT_EQ(TurtleStringParser::isParsable("# IRI with all chars in it.\n" + " \n" + " ."), + true); + } +}// namespace Dice::tests::rdf_parser::turtle_official_positive_tests \ No newline at end of file diff --git a/tests/TurtleParserConcurrentTests.cpp b/tests/TurtleParserConcurrentTests.cpp deleted file mode 100644 index 6ccd16f..0000000 --- a/tests/TurtleParserConcurrentTests.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include - -namespace { - using namespace rdf_parser::Turtle; - using namespace rdf_parser::store::rdf; -} - - -TEST(TurtleParserConcurrentTests,ntripleFileCon1) { - CuncurrentStreamParser<> parser("../datasets/instances-labels.nt"); - auto it= parser.begin(); - while (it) { - it++; - //Do something - } - -} - -TEST(TurtleParserConcurrentTests,turtleFileCon1) { - CuncurrentStreamParser<> parser("../datasets/instance-types_transitive.ttl"); - auto it= parser.begin(); - while (it) - it++; - -} diff --git a/tests/TurtleParserFilesTests.cpp b/tests/TurtleParserFilesTests.cpp index 16ca47a..865125c 100644 --- a/tests/TurtleParserFilesTests.cpp +++ b/tests/TurtleParserFilesTests.cpp @@ -1,19 +1,18 @@ #include -#include -namespace { - using namespace rdf_parser::Turtle; - using namespace rdf_parser::store::rdf; -} +#include +namespace Dice::tests::rdf_parser::turtle_parser_concurrent_tests { + using namespace Dice::rdf_parser::Turtle::parsers; -TEST(TurtleParserFilesTests,ntripleFile1) { - TurtleParser<> parser("../datasets/g.nt"); - ASSERT_EQ(parser.isContentParsable(), true); -} - -TEST(TurtleParserFilesTests,turtleFile1) { - TurtleParser<> parser("datasets/dbpedia_2GB_subset.ttl"); - ASSERT_EQ(parser.isContentParsable(), true); - -} + TEST(TurtleParserFilesTests, parseSWDF) { + std::filesystem::current_path(std::filesystem::canonical("/proc/self/exe").parent_path()); + TurtleFileParser parser{"../tests/datasets/swdf.nt"}; + long i = 0; + for (const auto &item : parser) { + if (item.hash()) + i++; + } + ASSERT_TRUE(i > 0); + } +}// namespace Dice::tests::rdf_parser::turtle_parser_concurrent_tests \ No newline at end of file diff --git a/tests/TurtlePartialGrammerTests.cpp b/tests/TurtlePartialGrammerTests.cpp deleted file mode 100644 index 07835f0..0000000 --- a/tests/TurtlePartialGrammerTests.cpp +++ /dev/null @@ -1,77 +0,0 @@ - -#include -#include -#include -#include -#include - - -using namespace rdf_parser::Turtle; - - -TEST(PatrialGrammerTest, F1) { - - rdf_parser::Turtle::parsers::StringParser parser("?g ?who . ") ; - auto it= parser.begin(); - while (it) - { - auto x=*it; - it++; - } -} - -TEST(PatrialGrammerTest, AddedprefixTest) { - - std::map prefixes; - prefixes.insert(std::pair("foaf","http://xmlns.com/foaf/0.1/")); - rdf_parser::Turtle::parsers::StringParser parser("?x foaf:name ?name .",prefixes) ; - auto it= parser.begin(); - while (it) - { - auto x=*it; - it++; - } -} - -TEST(PatrialGrammerTest, AddedprefixTest2) { - - std::map prefixes; - //prefixes.insert(std::pair("","http://example.org/book/")); - prefixes.insert(std::pair("dc","http://purl.org/dc/elements/1.1/")); - prefixes.insert(std::pair("ns","http://example.org/ns#")); - rdf_parser::Turtle::parsers::StringParser parser("?book dc:title ?title ;\n" - " ns:price ?price .",prefixes) ; - auto it= parser.begin(); - while (it) - { - auto x=*it; - it++; - } -} - - -TEST(PatrialGrammerTest, tripleBlock) { - - -rdf_parser::Turtle::parsers::StringParser parser("?x ?y .\n" - "?x ?nameX . ") ; -auto it= parser.begin(); -while (it) -{ -auto x=*it; -it++; -} -} - -TEST(PatrialGrammerTest, tripleBlock2) { - - std::map prefixes; - prefixes.insert(std::pair("ex","http://example.org/")); - rdf_parser::Turtle::parsers::StringParser parser("?buch ex:hatVerlag . ?buch ex:titel ?title . ?buch ex:autor ?autor . ",prefixes) ; - auto it= parser.begin(); - while (it) - { - auto x=*it; - it++; - } -} \ No newline at end of file