-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
296 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
//! Read a Dataset serialized in [N-Quads] from the standart input, | ||
//! and write back to the standatd output its canonical form, | ||
//! using the [RDFC-1.0] canonicalization algorithm. | ||
//! | ||
//! Parameters of the RDFC-1.0 can be provided via the following environment variables: | ||
//! * SOPHIA_RDFC10_DEPTH_FACTOR | ||
//! * SOPHIA_RDFC10_PERMUTATION_LIMIT | ||
//! | ||
//! [N-Quads]: https://www.w3.org/TR/n-quads/ | ||
//! [RDFC-1.0]: https://www.w3.org/TR/rdf-canon/ | ||
use std::env::{var, VarError::*}; | ||
use std::io::{stdin, stdout, BufReader, BufWriter}; | ||
|
||
use sophia::api::prelude::*; | ||
use sophia::api::quad::Spog; | ||
use sophia::api::term::SimpleTerm; | ||
use sophia::c14n::rdfc10; | ||
use sophia::turtle::parser::nq; | ||
use sophia_c14n::hash::Sha256; | ||
use sophia_c14n::rdfc10::{DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT}; | ||
|
||
fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
let input = BufReader::new(stdin()); | ||
let dataset: MyDataset = nq::parse_bufread(input).collect_quads()?; | ||
let output = BufWriter::new(stdout()); | ||
let depth_factor = match var("SOPHIA_RDFC10_DEPTH_FACTOR") { | ||
Ok(txt) => txt | ||
.parse() | ||
.expect("SOPHIA_RDFC10_DEPTH_FACTOR is not a valid f32"), | ||
Err(NotPresent) => DEFAULT_DEPTH_FACTOR, | ||
Err(other) => return Err(other.into()), | ||
}; | ||
let permutation_limit = match var("SOPHIA_RDFC10_PERMUTATION_LIMIT") { | ||
Ok(txt) => txt | ||
.parse() | ||
.expect("SOPHIA_RDFC10_PERMUTATION_LIMIT is not a valid usize"), | ||
Err(NotPresent) => DEFAULT_PERMUTATION_LIMIT, | ||
Err(other) => return Err(other.into()), | ||
}; | ||
// TODO make it possible to select another hash function | ||
rdfc10::normalize_with::<Sha256, _, _>(&dataset, output, depth_factor, permutation_limit)?; | ||
Ok(()) | ||
} | ||
|
||
type MyDataset = std::collections::HashSet<Spog<SimpleTerm<'static>>>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
//! Parse a graph or a dataset from the standard input, | ||
//! in the format specified in the first argument, | ||
//! and write it back in [N-Triples]/[N-Quads] to the standard output. | ||
//! | ||
//! Alternatively, the input file name can be provided as a second argument, | ||
//! which will also set the base IRI to the corresponding file: URL. | ||
//! | ||
//! The base IRI can be overridden via the environment variable SOPHIA_BASE. | ||
//! | ||
//! Recognized formats are: | ||
//! - [`ntriples`](https://www.w3.org/TR/n-triples/) (alias `nt`) | ||
//! - [`turtle`](https://www.w3.org/TR/turtle/) (alias `ttl`) | ||
//! - [`nquads`](https://www.w3.org/TR/n-quads/) (alias `nq`) | ||
//! - [`trig`](https://www.w3.org/TR/trig/) | ||
//! - `gnq` (Generalized [N-Quads](https://www.w3.org/TR/n-quads/)) | ||
//! - `gtrig` (Generalized [TriG](https://www.w3.org/TR/trig/), default) | ||
//! - [`jsonld`](https://www.w3.org/TR/json-ld11) (if compiled witht the `jsonld` feature) | ||
//! - [`rdfxml`](https://www.w3.org/TR/rdf-syntax-grammar) (if compiled witht the `xml` feature, alias `rdf`) | ||
//! | ||
//! [N-Triples]: https://www.w3.org/TR/n-triples/ | ||
//! [N-Quads]: https://www.w3.org/TR/n-quads/ | ||
use std::fs::File; | ||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdin}; | ||
|
||
use sophia::api::prelude::*; | ||
use sophia::api::source::StreamError::{SinkError, SourceError}; | ||
#[cfg(feature = "jsonld")] | ||
use sophia::jsonld::{JsonLdOptions, JsonLdParser}; | ||
use sophia::turtle::parser::{ | ||
gnq::GNQuadsParser, gtrig::GTriGParser, nq::NQuadsParser, nt::NTriplesParser, trig::TriGParser, | ||
turtle::TurtleParser, | ||
}; | ||
use sophia::turtle::serializer::{nq::NqSerializer, nt::NtSerializer}; | ||
#[cfg(feature = "xml")] | ||
use sophia::xml::parser::RdfXmlParser; | ||
|
||
fn main() { | ||
let format = std::env::args() | ||
.nth(1) | ||
.unwrap_or_else(|| "gtrig".to_string()); | ||
let path = std::env::args().nth(2); | ||
let base = Some(if let Some(iri) = std::env::var_os("SOPHIA_BASE") { | ||
let iri = iri | ||
.into_string() | ||
.expect("Invalid UTF-8 data in SOPHIA_BASE"); | ||
Iri::new(iri).expect("Invalid IRI in SOPHIA_BASE") | ||
} else if let Some(path) = &path { | ||
let cwd = std::env::current_dir().expect("No current directory"); | ||
let url = url::Url::from_file_path(cwd.join(path)).expect("Invalid path"); | ||
Iri::new(url.into()).expect("Invalid file: IRI") | ||
} else { | ||
Iri::new_unchecked("x-stdin://localhost/".into()) | ||
}); | ||
let input = Input::new(path); | ||
let res = match &format[..] { | ||
"ntriples" | "nt" => dump_triples(input, NTriplesParser {}), | ||
"turtle" | "ttl" => dump_triples(input, TurtleParser { base }), | ||
"nquads" | "nq" => dump_quads(input, NQuadsParser {}), | ||
"trig" => dump_quads(input, TriGParser { base }), | ||
"gnq" => dump_quads(input, GNQuadsParser {}), | ||
"gtrig" => dump_quads(input, GTriGParser { base }), | ||
#[cfg(feature = "jsonld")] | ||
"json-ld" | "jsonld" => { | ||
let options = JsonLdOptions::new() | ||
.with_base(base.clone().unwrap().map_unchecked(std::sync::Arc::from)); | ||
let loader: sophia::jsonld::loader::FileUrlLoader = Default::default(); | ||
#[cfg(feature = "http_client")] | ||
let loader = sophia::jsonld::loader::ChainLoader::new( | ||
loader, | ||
sophia::jsonld::loader::HttpLoader::default(), | ||
); | ||
let options = options.with_document_loader(loader); | ||
dump_quads(input, JsonLdParser::new_with_options(options)) | ||
} | ||
#[cfg(feature = "xml")] | ||
"rdfxml" | "rdf" => dump_triples(input, RdfXmlParser { base }), | ||
_ => { | ||
eprintln!("Unrecognized format: {}", format); | ||
std::process::exit(-1); | ||
} | ||
}; | ||
if let Err(msg) = res { | ||
eprintln!("{}", msg); | ||
std::process::exit(1); | ||
} | ||
} | ||
|
||
fn dump_triples<P: TripleParser<Input>>(input: Input, p: P) -> Result<(), String> { | ||
let triple_source = p.parse(input); | ||
|
||
let output = BufWriter::new(stdout()); | ||
let mut ser = NtSerializer::new(output); | ||
match ser.serialize_triples(triple_source) { | ||
Ok(_) => Ok(()), | ||
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)), | ||
Err(SinkError(e)) => Err(format!("Error while writing quads: {}", e)), | ||
} | ||
} | ||
|
||
fn dump_quads<P: QuadParser<Input>>(input: Input, p: P) -> Result<(), String> { | ||
let quad_source = p.parse(input); | ||
|
||
let output = BufWriter::new(stdout()); | ||
let mut ser = NqSerializer::new(output); | ||
match ser.serialize_quads(quad_source) { | ||
Ok(_) => Ok(()), | ||
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)), | ||
Err(SinkError(e)) => Err(format!("Error while writing quads: {}", e)), | ||
} | ||
} | ||
|
||
enum Input { | ||
Stdin(BufReader<Stdin>), | ||
File(BufReader<File>), | ||
} | ||
|
||
impl Input { | ||
fn new(path: Option<String>) -> Self { | ||
match path { | ||
None => Self::Stdin(BufReader::new(stdin())), | ||
Some(path) => Self::File(BufReader::new(File::open(path).expect("Can not open file"))), | ||
} | ||
} | ||
} | ||
|
||
impl Read for Input { | ||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { | ||
match self { | ||
Input::Stdin(b) => b.read(buf), | ||
Input::File(b) => b.read(buf), | ||
} | ||
} | ||
} | ||
|
||
impl BufRead for Input { | ||
fn fill_buf(&mut self) -> std::io::Result<&[u8]> { | ||
match self { | ||
Input::Stdin(b) => b.fill_buf(), | ||
Input::File(b) => b.fill_buf(), | ||
} | ||
} | ||
|
||
fn consume(&mut self, amt: usize) { | ||
match self { | ||
Input::Stdin(b) => b.consume(amt), | ||
Input::File(b) => b.consume(amt), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
//! Read a graph or a dataset from the standard input in [N-Triples]/[N-Quads], | ||
//! and serialize it back to the format specifed in the first argument. | ||
//! | ||
//! Recognized formats are: | ||
//! - [`ntriples`](https://www.w3.org/TR/n-triples/) (alias `nt`) | ||
//! - [`turtle`](https://www.w3.org/TR/turtle/) (alias `ttl`) | ||
//! - [`nquads`](https://www.w3.org/TR/n-quads/) (alias `nq`) | ||
//! - [`trig`](https://www.w3.org/TR/trig/) | ||
//! - [`jsonld`](https://www.w3.org/TR/json-ld11) (if compiled witht the `jsonld` feature) | ||
//! - [`rdfxml`](https://www.w3.org/TR/rdf-syntax-grammar) (if compiled witht the `xml` feature, alias `rdf`) | ||
//! | ||
//! NB: if the input is a dataset with named graphs, | ||
//! and the ouput format is a graph format, | ||
//! then only the default graph is serialized. | ||
//! | ||
//! [N-Triples]: https://www.w3.org/TR/n-triples/ | ||
//! [N-Quads]: https://www.w3.org/TR/n-quads/ | ||
use std::io::{stdin, stdout, BufReader, BufWriter}; | ||
|
||
use sophia::api::prelude::*; | ||
use sophia::api::source::StreamError::{SinkError, SourceError}; | ||
#[cfg(feature = "jsonld")] | ||
use sophia::jsonld::{serializer::JsonLdSerializer, JsonLdOptions}; | ||
use sophia::turtle::parser::gnq; | ||
use sophia::turtle::serializer::{ | ||
nq::NqSerializer, | ||
nt::NtSerializer, | ||
trig::{TrigConfig, TrigSerializer}, | ||
turtle::{TurtleConfig, TurtleSerializer}, | ||
}; | ||
#[cfg(feature = "xml")] | ||
use sophia::xml::serializer::RdfXmlSerializer; | ||
|
||
fn main() { | ||
let input = BufReader::new(stdin()); | ||
let quad_source = gnq::parse_bufread(input); | ||
let out = BufWriter::new(stdout()); | ||
|
||
let format = std::env::args() | ||
.nth(1) | ||
.unwrap_or_else(|| "trig".to_string()); | ||
let res = match &format[..] { | ||
"ntriples" | "nt" => serialize_triples(quad_source, NtSerializer::new(out)), | ||
"turtle" | "ttl" => { | ||
let config = TurtleConfig::new().with_pretty(true); | ||
let ser = TurtleSerializer::new_with_config(out, config); | ||
serialize_triples(quad_source, ser) | ||
} | ||
"nquads" | "nq" => serialize_quads(quad_source, NqSerializer::new(out)), | ||
"trig" => { | ||
let config = TrigConfig::new().with_pretty(true); | ||
let ser = TrigSerializer::new_with_config(out, config); | ||
serialize_quads(quad_source, ser) | ||
} | ||
#[cfg(feature = "jsonld")] | ||
"json-ld" | "jsonld" => serialize_quads( | ||
quad_source, | ||
JsonLdSerializer::new_with_options(out, JsonLdOptions::new().with_spaces(2)), | ||
), | ||
#[cfg(feature = "xml")] | ||
"rdfxml" | "rdf" => serialize_triples(quad_source, RdfXmlSerializer::new(out)), | ||
_ => { | ||
eprintln!("Unrecognized format: {}", format); | ||
std::process::exit(-1); | ||
} | ||
}; | ||
if let Err(msg) = res { | ||
eprintln!("{}", msg); | ||
std::process::exit(1); | ||
} | ||
} | ||
|
||
fn serialize_triples<Q: QuadSource, S: TripleSerializer>( | ||
quad_source: Q, | ||
mut ser: S, | ||
) -> Result<(), String> { | ||
let triple_source = quad_source.filter_quads(|q| q.g().is_none()).to_triples(); | ||
match ser.serialize_triples(triple_source) { | ||
Ok(_) => Ok(()), | ||
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)), | ||
Err(SinkError(e)) => Err(format!("Error while serializing triples: {}", e)), | ||
} | ||
} | ||
|
||
fn serialize_quads<Q: QuadSource, S: QuadSerializer>( | ||
quad_source: Q, | ||
mut ser: S, | ||
) -> Result<(), String> { | ||
match ser.serialize_quads(quad_source) { | ||
Ok(_) => Ok(()), | ||
Err(SourceError(e)) => Err(format!("Error while parsing input: {}", e)), | ||
Err(SinkError(e)) => Err(format!("Error while serializing quads: {}", e)), | ||
} | ||
} |