Skip to content

Commit

Permalink
add a few examples for sophia
Browse files Browse the repository at this point in the history
  • Loading branch information
pchampin committed Oct 24, 2023
1 parent 711936e commit 5433b9f
Show file tree
Hide file tree
Showing 6 changed files with 296 additions and 1 deletion.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ rio_xml = { version = "0.8" }
test-case = "3.1.0"
thiserror = "1.0.32"
tokio = { version="1.33.0", features = ["rt"] }
url = "2.4.1"

[profile.release]
lto = true
2 changes: 1 addition & 1 deletion jsonld/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ sophia_iri.workspace = true
sophia_term.workspace = true
thiserror.workspace = true
tokio.workspace = true
url = "2.4.1"
url.workspace = true

[dev-dependencies]
sophia_turtle.workspace = true
3 changes: 3 additions & 0 deletions sophia/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ sophia_rio.workspace = true
sophia_turtle.workspace = true
sophia_term.workspace = true
sophia_xml = { workspace = true, optional = true }

[dev-dependencies]
url.workspace = true
46 changes: 46 additions & 0 deletions sophia/examples/canonicalize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//! Read a Dataset serialized in [N-Quads] from the standart input,
//! and write back to the standatd output its canonical form,
//! using the [RDFC-1.0] canonicalization algorithm.
//!
//! Parameters of the RDFC-1.0 can be provided via the following environment variables:
//! * SOPHIA_RDFC10_DEPTH_FACTOR
//! * SOPHIA_RDFC10_PERMUTATION_LIMIT
//!
//! [N-Quads]: https://www.w3.org/TR/n-quads/
//! [RDFC-1.0]: https://www.w3.org/TR/rdf-canon/
use std::env::{var, VarError::*};
use std::io::{stdin, stdout, BufReader, BufWriter};

use sophia::api::prelude::*;
use sophia::api::quad::Spog;
use sophia::api::term::SimpleTerm;
use sophia::c14n::rdfc10;
use sophia::turtle::parser::nq;
use sophia_c14n::hash::Sha256;
use sophia_c14n::rdfc10::{DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let input = BufReader::new(stdin());
let dataset: MyDataset = nq::parse_bufread(input).collect_quads()?;
let output = BufWriter::new(stdout());
let depth_factor = match var("SOPHIA_RDFC10_DEPTH_FACTOR") {
Ok(txt) => txt
.parse()
.expect("SOPHIA_RDFC10_DEPTH_FACTOR is not a valid f32"),
Err(NotPresent) => DEFAULT_DEPTH_FACTOR,
Err(other) => return Err(other.into()),
};
let permutation_limit = match var("SOPHIA_RDFC10_PERMUTATION_LIMIT") {
Ok(txt) => txt
.parse()
.expect("SOPHIA_RDFC10_PERMUTATION_LIMIT is not a valid usize"),
Err(NotPresent) => DEFAULT_PERMUTATION_LIMIT,
Err(other) => return Err(other.into()),
};
// TODO make it possible to select another hash function
rdfc10::normalize_with::<Sha256, _, _>(&dataset, output, depth_factor, permutation_limit)?;
Ok(())
}

type MyDataset = std::collections::HashSet<Spog<SimpleTerm<'static>>>;
150 changes: 150 additions & 0 deletions sophia/examples/parse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
//! Parse a graph or a dataset from the standard input,
//! in the format specified in the first argument,
//! and write it back in [N-Triples]/[N-Quads] to the standard output.
//!
//! Alternatively, the input file name can be provided as a second argument,
//! which will also set the base IRI to the corresponding file: URL.
//!
//! The base IRI can be overridden via the environment variable SOPHIA_BASE.
//!
//! Recognized formats are:
//! - [`ntriples`](https://www.w3.org/TR/n-triples/) (alias `nt`)
//! - [`turtle`](https://www.w3.org/TR/turtle/) (alias `ttl`)
//! - [`nquads`](https://www.w3.org/TR/n-quads/) (alias `nq`)
//! - [`trig`](https://www.w3.org/TR/trig/)
//! - `gnq` (Generalized [N-Quads](https://www.w3.org/TR/n-quads/))
//! - `gtrig` (Generalized [TriG](https://www.w3.org/TR/trig/), default)
//! - [`jsonld`](https://www.w3.org/TR/json-ld11) (if compiled witht the `jsonld` feature)
//! - [`rdfxml`](https://www.w3.org/TR/rdf-syntax-grammar) (if compiled witht the `xml` feature, alias `rdf`)
//!
//! [N-Triples]: https://www.w3.org/TR/n-triples/
//! [N-Quads]: https://www.w3.org/TR/n-quads/
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdin};

use sophia::api::prelude::*;
use sophia::api::source::StreamError::{SinkError, SourceError};
#[cfg(feature = "jsonld")]
use sophia::jsonld::{JsonLdOptions, JsonLdParser};
use sophia::turtle::parser::{
gnq::GNQuadsParser, gtrig::GTriGParser, nq::NQuadsParser, nt::NTriplesParser, trig::TriGParser,
turtle::TurtleParser,
};
use sophia::turtle::serializer::{nq::NqSerializer, nt::NtSerializer};
#[cfg(feature = "xml")]
use sophia::xml::parser::RdfXmlParser;

fn main() {
let format = std::env::args()
.nth(1)
.unwrap_or_else(|| "gtrig".to_string());
let path = std::env::args().nth(2);
let base = Some(if let Some(iri) = std::env::var_os("SOPHIA_BASE") {
let iri = iri
.into_string()
.expect("Invalid UTF-8 data in SOPHIA_BASE");
Iri::new(iri).expect("Invalid IRI in SOPHIA_BASE")
} else if let Some(path) = &path {
let cwd = std::env::current_dir().expect("No current directory");
let url = url::Url::from_file_path(cwd.join(path)).expect("Invalid path");
Iri::new(url.into()).expect("Invalid file: IRI")
} else {
Iri::new_unchecked("x-stdin://localhost/".into())
});
let input = Input::new(path);
let res = match &format[..] {
"ntriples" | "nt" => dump_triples(input, NTriplesParser {}),
"turtle" | "ttl" => dump_triples(input, TurtleParser { base }),
"nquads" | "nq" => dump_quads(input, NQuadsParser {}),
"trig" => dump_quads(input, TriGParser { base }),
"gnq" => dump_quads(input, GNQuadsParser {}),
"gtrig" => dump_quads(input, GTriGParser { base }),
#[cfg(feature = "jsonld")]
"json-ld" | "jsonld" => {
let options = JsonLdOptions::new()
.with_base(base.clone().unwrap().map_unchecked(std::sync::Arc::from));
let loader: sophia::jsonld::loader::FileUrlLoader = Default::default();
#[cfg(feature = "http_client")]
let loader = sophia::jsonld::loader::ChainLoader::new(
loader,
sophia::jsonld::loader::HttpLoader::default(),
);
let options = options.with_document_loader(loader);
dump_quads(input, JsonLdParser::new_with_options(options))
}
#[cfg(feature = "xml")]
"rdfxml" | "rdf" => dump_triples(input, RdfXmlParser { base }),
_ => {
eprintln!("Unrecognized format: {}", format);
std::process::exit(-1);
}
};
if let Err(msg) = res {
eprintln!("{}", msg);
std::process::exit(1);
}
}

fn dump_triples<P: TripleParser<Input>>(input: Input, p: P) -> Result<(), String> {
let triple_source = p.parse(input);

let output = BufWriter::new(stdout());
let mut ser = NtSerializer::new(output);
match ser.serialize_triples(triple_source) {
Ok(_) => Ok(()),
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)),
Err(SinkError(e)) => Err(format!("Error while writing quads: {}", e)),
}
}

fn dump_quads<P: QuadParser<Input>>(input: Input, p: P) -> Result<(), String> {
let quad_source = p.parse(input);

let output = BufWriter::new(stdout());
let mut ser = NqSerializer::new(output);
match ser.serialize_quads(quad_source) {
Ok(_) => Ok(()),
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)),
Err(SinkError(e)) => Err(format!("Error while writing quads: {}", e)),
}
}

enum Input {
Stdin(BufReader<Stdin>),
File(BufReader<File>),
}

impl Input {
fn new(path: Option<String>) -> Self {
match path {
None => Self::Stdin(BufReader::new(stdin())),
Some(path) => Self::File(BufReader::new(File::open(path).expect("Can not open file"))),
}
}
}

impl Read for Input {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self {
Input::Stdin(b) => b.read(buf),
Input::File(b) => b.read(buf),
}
}
}

impl BufRead for Input {
fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
match self {
Input::Stdin(b) => b.fill_buf(),
Input::File(b) => b.fill_buf(),
}
}

fn consume(&mut self, amt: usize) {
match self {
Input::Stdin(b) => b.consume(amt),
Input::File(b) => b.consume(amt),
}
}
}
95 changes: 95 additions & 0 deletions sophia/examples/serialize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//! Read a graph or a dataset from the standard input in [N-Triples]/[N-Quads],
//! and serialize it back to the format specifed in the first argument.
//!
//! Recognized formats are:
//! - [`ntriples`](https://www.w3.org/TR/n-triples/) (alias `nt`)
//! - [`turtle`](https://www.w3.org/TR/turtle/) (alias `ttl`)
//! - [`nquads`](https://www.w3.org/TR/n-quads/) (alias `nq`)
//! - [`trig`](https://www.w3.org/TR/trig/)
//! - [`jsonld`](https://www.w3.org/TR/json-ld11) (if compiled witht the `jsonld` feature)
//! - [`rdfxml`](https://www.w3.org/TR/rdf-syntax-grammar) (if compiled witht the `xml` feature, alias `rdf`)
//!
//! NB: if the input is a dataset with named graphs,
//! and the ouput format is a graph format,
//! then only the default graph is serialized.
//!
//! [N-Triples]: https://www.w3.org/TR/n-triples/
//! [N-Quads]: https://www.w3.org/TR/n-quads/
use std::io::{stdin, stdout, BufReader, BufWriter};

use sophia::api::prelude::*;
use sophia::api::source::StreamError::{SinkError, SourceError};
#[cfg(feature = "jsonld")]
use sophia::jsonld::{serializer::JsonLdSerializer, JsonLdOptions};
use sophia::turtle::parser::gnq;
use sophia::turtle::serializer::{
nq::NqSerializer,
nt::NtSerializer,
trig::{TrigConfig, TrigSerializer},
turtle::{TurtleConfig, TurtleSerializer},
};
#[cfg(feature = "xml")]
use sophia::xml::serializer::RdfXmlSerializer;

fn main() {
let input = BufReader::new(stdin());
let quad_source = gnq::parse_bufread(input);
let out = BufWriter::new(stdout());

let format = std::env::args()
.nth(1)
.unwrap_or_else(|| "trig".to_string());
let res = match &format[..] {
"ntriples" | "nt" => serialize_triples(quad_source, NtSerializer::new(out)),
"turtle" | "ttl" => {
let config = TurtleConfig::new().with_pretty(true);
let ser = TurtleSerializer::new_with_config(out, config);
serialize_triples(quad_source, ser)
}
"nquads" | "nq" => serialize_quads(quad_source, NqSerializer::new(out)),
"trig" => {
let config = TrigConfig::new().with_pretty(true);
let ser = TrigSerializer::new_with_config(out, config);
serialize_quads(quad_source, ser)
}
#[cfg(feature = "jsonld")]
"json-ld" | "jsonld" => serialize_quads(
quad_source,
JsonLdSerializer::new_with_options(out, JsonLdOptions::new().with_spaces(2)),
),
#[cfg(feature = "xml")]
"rdfxml" | "rdf" => serialize_triples(quad_source, RdfXmlSerializer::new(out)),
_ => {
eprintln!("Unrecognized format: {}", format);
std::process::exit(-1);
}
};
if let Err(msg) = res {
eprintln!("{}", msg);
std::process::exit(1);
}
}

fn serialize_triples<Q: QuadSource, S: TripleSerializer>(
quad_source: Q,
mut ser: S,
) -> Result<(), String> {
let triple_source = quad_source.filter_quads(|q| q.g().is_none()).to_triples();
match ser.serialize_triples(triple_source) {
Ok(_) => Ok(()),
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)),
Err(SinkError(e)) => Err(format!("Error while serializing triples: {}", e)),
}
}

fn serialize_quads<Q: QuadSource, S: QuadSerializer>(
quad_source: Q,
mut ser: S,
) -> Result<(), String> {
match ser.serialize_quads(quad_source) {
Ok(_) => Ok(()),
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)),
Err(SinkError(e)) => Err(format!("Error while serializing quads: {}", e)),
}
}

0 comments on commit 5433b9f

Please sign in to comment.