Skip to content

Commit

Permalink
Don't rely on reqwest::blocking::get for HTTP requests
Browse files Browse the repository at this point in the history
  • Loading branch information
mmarx committed Jul 6, 2023
1 parent 797cb22 commit 6ef2aea
Show file tree
Hide file tree
Showing 16 changed files with 229 additions and 179 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nemo-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use colored::Colorize;
use nemo::{
error::{Error, ReadingError},
execution::{DefaultExecutionEngine, ExecutionEngine},
io::{input_manager::ResourceProviders, parser::parse_program, RecordWriter},
io::{parser::parse_program, resource_providers::ResourceProviders, RecordWriter},
meta::{timing::TimedDisplay, TimedCode},
model::OutputPredicateSelection,
};
Expand Down
1 change: 1 addition & 0 deletions nemo-physical/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ linked-hash-map = "0.5.6"
howlong = { version = "0.1", optional = true }
rio_turtle = "0.8.4"
rio_xml = "0.8.4"
reqwest = "0.11.18"

[dev-dependencies]
arbitrary = { version = "1", features = ["derive"] }
Expand Down
3 changes: 3 additions & 0 deletions nemo-physical/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ pub enum ReadingError {
/// Error in Rio's RDF/XML parser
#[error(transparent)]
RioXML(#[from] rio_xml::RdfXmlError),
/// Error in Requwest's HTTP handler
#[error(transparent)]
HTTPTransfer(#[from] reqwest::Error),
}

/// Error-Collection for all the possible Errors occurring in this crate
Expand Down
2 changes: 1 addition & 1 deletion nemo-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{collections::HashSet, fs::read_to_string};
use nemo::{
datatypes::{DataValueT, Double, Float},
execution::ExecutionEngine,
io::{input_manager::ResourceProviders, OutputFileManager, RecordWriter},
io::{resource_providers::ResourceProviders, OutputFileManager, RecordWriter},
};

use pyo3::{create_exception, prelude::*};
Expand Down
3 changes: 1 addition & 2 deletions nemo-wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ use js_sys::Set;
use js_sys::Uint8Array;
use nemo::execution::ExecutionEngine;

use nemo::io::input_manager::ResourceProvider;
use nemo::io::input_manager::ResourceProviders;
use nemo::io::parser::parse_program;
use nemo::io::resource_providers::{ResourceProvider, ResourceProviders};
use nemo_physical::datatypes::DataValueT;
use nemo_physical::table_reader::Resource;
use wasm_bindgen::prelude::wasm_bindgen;
Expand Down
11 changes: 4 additions & 7 deletions nemo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,16 @@ readme = "README.md"
repository.workspace = true

[features]
# TODO: Add "http-resource-provider"
default = ["timing"]
# Allows building for web assembly environments
# Enables the "js" feature of the "getrandom" crate
# This feature cannot be used together with the "timing" feature, because the "howlong" crate does not support web assembly environments
# This feature cannot be used together with the "http-resource-provider" feature, because the "reqwest" crate does not support web assembly environments with the "blocking" feature
js = ["getrandom/js"]
http-resource-provider = ["dep:reqwest"]
no-prefixed-string-dictionary = ["nemo-physical/no-prefixed-string-dictionary"]
timing = ["nemo-physical/timing"]

[dependencies]
nemo-physical = { path = "../nemo-physical", default-features = false }
log = "0.4"
nom = "7.1.1"
macros = { path = "../libs/macros" }
Expand All @@ -32,15 +30,14 @@ thiserror = "1.0"
flate2 = "1"
sanitise-file-name = "1.0.0"
nom_locate = { version = "4.1.0", features = [ "runtime-dispatch-simd" ] }
getrandom = {version = "0.2.9", default-features = false}
reqwest = { version = "0.11.18", features = ["blocking"], optional = true }

nemo-physical = { path = "../nemo-physical", default-features = false }
getrandom = { version = "0.2.9", default-features = false }
path-slash = "0.2.1"
rio_api = "0.8.4"
rio_turtle = "0.8.4"
rio_xml = "0.8.4"
oxiri = "0.2.2"
tokio = { version = "1.29.1", features = [ "rt" ] }
reqwest = { version = "0.11.18" }

[dev-dependencies]
env_logger = "*"
Expand Down
2 changes: 1 addition & 1 deletion nemo/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use crate::{
error::{Error, ReadingError},
execution::{DefaultExecutionEngine, ExecutionEngine},
io::{
input_manager::ResourceProviders,
parser::{all_input_consumed, RuleParser},
resource_providers::ResourceProviders,
OutputFileManager, RecordWriter,
},
model::Identifier,
Expand Down
2 changes: 1 addition & 1 deletion nemo/src/execution/execution_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use nemo_physical::{

use crate::{
error::Error,
io::input_manager::{InputManager, ResourceProviders},
io::{input_manager::InputManager, resource_providers::ResourceProviders},
model::{chase_model::ChaseProgram, Identifier, Program, TermOperation},
program_analysis::analysis::ProgramAnalysis,
table_manager::TableManager,
Expand Down
2 changes: 2 additions & 0 deletions nemo/src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ pub mod input_manager;
pub mod output_file_manager;
pub mod parser;
pub mod rdf_triples;
pub mod resource_providers;

pub use input_manager::InputManager;
pub use output_file_manager::OutputFileManager;

use nemo_physical::dictionary::value_serializer::TrieSerializer;
Expand Down
6 changes: 3 additions & 3 deletions nemo/src/io/dsv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
//! ## Logical layer
//! ```
//! # use nemo_physical::table_reader::TableReader;
//! # use nemo::{types::LogicalTypeEnum, io::{input_manager::ResourceProviders, dsv::DSVReader}};
//! # use nemo::{types::LogicalTypeEnum, io::{resource_providers::ResourceProviders, dsv::DSVReader}};
//! # let file_path = String::from("../resources/doc/examples/city_population.csv");
//! let csv_reader = DSVReader::csv(
//! ResourceProviders::default(),
Expand All @@ -38,7 +38,7 @@
//! ```
//! # use nemo_physical::table_reader::TableReader;
//! #
//! # use nemo::{types::LogicalTypeEnum, io::{input_manager::ResourceProviders, dsv::DSVReader}};
//! # use nemo::{types::LogicalTypeEnum, io::{resource_providers::ResourceProviders, dsv::DSVReader}};
//! # use std::cell::RefCell;
//! # use nemo_physical::builder_proxy::{
//! # PhysicalBuilderProxyEnum, PhysicalColumnBuilderProxy, PhysicalStringColumnBuilderProxy
Expand Down Expand Up @@ -80,7 +80,7 @@ use crate::builder_proxy::LogicalColumnBuilderProxy;
use crate::error::{Error, ReadingError};
use crate::types::LogicalTypeEnum;

use super::input_manager::ResourceProviders;
use super::resource_providers::ResourceProviders;

/// A reader object for reading [DSV](https://en.wikipedia.org/wiki/Delimiter-separated_values) (delimiter separated values) files.
///
Expand Down
169 changes: 8 additions & 161 deletions nemo/src/io/input_manager.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
//! Management of resource providers, handling of decompression and resolution of resources to readers.
use std::{io::Read, path::PathBuf, rc::Rc};
use nemo_physical::management::database::TableSource;

use std::fs::File;

use flate2::read::GzDecoder;
use nemo_physical::{
error::ReadingError, management::database::TableSource, table_reader::Resource,
};
use path_slash::PathBufExt;

use crate::{error::Error, model::DataSource, types::LogicalTypeEnum};

use super::{
dsv::DSVReader,
parser::{all_input_consumed, iri::iri},
rdf_triples::RDFTriplesReader,
use crate::{
error::Error,
io::{dsv::DSVReader, rdf_triples::RDFTriplesReader, resource_providers::ResourceProviders},
model::DataSource,
types::LogicalTypeEnum,
};

/// Manages everything related to resolving the inputs of a Nemo program.
Expand All @@ -25,153 +16,9 @@ pub struct InputManager {
resource_providers: ResourceProviders,
}

/// A list of [`ResourceProvider`] sorted by decreasing priority.
///
/// This allows resolving a given resource, which may occur in a Nemo program,
/// to a reader (which return the actual by of e.g. a referenced file).
///
/// The list of [`ResourceProviders`] can be customized by users of the Rust nemo crate.
#[derive(Debug, Clone)]
pub struct ResourceProviders(Rc<Vec<Box<dyn ResourceProvider>>>);

impl ResourceProviders {
/// Construct using a list of [`ResourceProvider`]s
pub fn from(r: Vec<Box<dyn ResourceProvider>>) -> Self {
Self(Rc::new(r))
}

/// Returns instance which is unable to resolve any resources.
pub fn empty() -> Self {
Self(Rc::new(vec![]))
}

/// Resolves a resource.
///
/// First checks if the resource can be opened as gzip, otherwise opens the file directly.
pub fn open_resource(
&self,
resource: &Resource,
try_gzip: bool,
) -> Result<Box<dyn Read>, ReadingError> {
for resource_provider in self.0.iter() {
if let Some(reader) = resource_provider.open_resource(resource)? {
if !try_gzip {
return Ok(reader);
}

// Try opening with gzip
let gz_reader = GzDecoder::new(reader);

if gz_reader.header().is_some() {
return Ok(Box::new(gz_reader));
} else {
// Try again without gzip, otherwise go to next provider
if let Some(reader) = resource_provider.open_resource(resource)? {
return Ok(reader);
};
}
}
}

Err(ReadingError::ResourceNotProvided {
resource: resource.clone(),
})
}
}

impl Default for ResourceProviders {
fn default() -> Self {
Self(Rc::new(vec![
Box::<HTTPResourceProvider>::default(),
Box::<FileResourceProvider>::default(),
]))
}
}

/// Allows resolving resources to readers.
///
/// This allows specifying how to resolve a resource independent of how the file format is going to be parsed.
pub trait ResourceProvider: std::fmt::Debug {
/// Resolve and open a resource.
///
/// This function may be called multiple times in a row, e.g. when testing if a file can be opened using gzip.
///
/// The implementation can decide wether ir wants to handle the given resource, otherwise it can return `None`, and the next `ResourceProvider` will be consulted.
fn open_resource(&self, resource: &Resource) -> Result<Option<Box<dyn Read>>, ReadingError>;
}

fn is_iri(resource: &Resource) -> bool {
all_input_consumed(iri)(resource).is_ok()
}

/// Resolves resources from the OS-provided file system.
///
/// Handles `file:` IRIs and non-IRI, (possibly relative) file paths.
#[derive(Debug, Clone, Copy, Default)]
pub struct FileResourceProvider {}

impl ResourceProvider for FileResourceProvider {
fn open_resource(&self, resource: &Resource) -> Result<Option<Box<dyn Read>>, ReadingError> {
// Try to parse as file IRI
let path = if is_iri(resource) {
if resource.starts_with("file://") {
// File URI. We only support local files, i.e., URIs
// where the host part is either empty or `localhost`.

let path = resource
.strip_prefix("file://localhost")
.or_else(|| resource.strip_prefix("file://"))
.ok_or_else(|| ReadingError::InvalidFileUri(resource.to_string()))?;
PathBuf::from_slash(path)
} else {
// Non-file IRI, file resource provider is not responsible
return Ok(None);
}
} else {
// Not a valid URI, interpret as path directly
PathBuf::from(resource)
};

let file = File::open(path)?;
Ok(Some(Box::new(file)))
}
}

/// Resolves resources using HTTP or HTTPS.
///
/// Handles `http:` and `https:` IRIs.
#[derive(Debug, Clone, Copy, Default)]
pub struct HTTPResourceProvider {}

impl ResourceProvider for HTTPResourceProvider {
fn open_resource(&self, resource: &Resource) -> Result<Option<Box<dyn Read>>, ReadingError> {
if !is_iri(resource) {
return Ok(None);
}

if !(resource.starts_with("http:") || resource.starts_with("https:")) {
// Non-http IRI, resource provider is not responsible
return Ok(None);
}

#[cfg(not(feature = "http-resource-provider"))]
{
panic!("Using the HTTPResourceProvider requires the http-resource-provider feature")
}
#[cfg(feature = "http-resource-provider")]
{
let response = reqwest::blocking::get(resource).unwrap();
Ok(Some(Box::new(response)))
}
}
}

// See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features
#[cfg(all(feature = "js", feature = "http-resource-provider"))]
compile_error!("feature \"js\" and feature \"http-resource-provider\" cannot be enabled at the same time, because the \"reqwest\" crate does not support web assembly environments with the \"blocking\" feature");

impl InputManager {
#[allow(missing_docs)]
/// Create a new [input manager][InputManager] from the given
/// [resource providers][ResourceProviders].
pub fn new(resource_providers: ResourceProviders) -> Self {
Self { resource_providers }
}
Expand Down
2 changes: 1 addition & 1 deletion nemo/src/io/rdf_triples.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rio_xml::RdfXmlParser;

use crate::types::LogicalTypeEnum;

use super::input_manager::ResourceProviders;
use super::resource_providers::ResourceProviders;

/// A [`TableReader`] for RDF 1.1 files containing triples.
#[derive(Debug, Clone)]
Expand Down
Loading

0 comments on commit 6ef2aea

Please sign in to comment.