-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of github.com:mseri/doi2bib into main
- Loading branch information
Showing
7 changed files
with
254 additions
and
185 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
(executable | ||
(name doi2bib) | ||
(public_name doi2bib) | ||
(libraries astring cmdliner cohttp-lwt-unix ezxmlm) | ||
(libraries astring cmdliner cohttp-lwt-unix decompress.gz ezxmlm) | ||
(preprocess future_syntax)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
(* Mostly from deflate.gz documentation :) *) | ||
|
||
let uncompress_string str = | ||
let i = De.bigstring_create De.io_buffer_size in | ||
let o = De.bigstring_create De.io_buffer_size in | ||
let r = Buffer.create 0x1000 in | ||
let p = ref 0 in | ||
let refill buf = | ||
let len = min (String.length str - !p) De.io_buffer_size in | ||
Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; | ||
p := !p + len; | ||
len | ||
in | ||
let flush buf len = | ||
let str = Bigstringaf.substring buf ~off:0 ~len in | ||
Buffer.add_string r str | ||
in | ||
Gz.Higher.uncompress ~refill ~flush i o | ||
|> Result.map (fun _metadata -> Buffer.contents r) | ||
|
||
|
||
let time () = Int32.of_float (Unix.gettimeofday ()) | ||
|
||
let compress_string ?(level = 4) str = | ||
let i = De.bigstring_create De.io_buffer_size in | ||
let o = De.bigstring_create De.io_buffer_size in | ||
let w = De.Lz77.make_window ~bits:15 in | ||
let q = De.Queue.create 0x1000 in | ||
let r = Buffer.create 0x1000 in | ||
let p = ref 0 in | ||
let cfg = Gz.Higher.configuration Gz.Unix time in | ||
let refill buf = | ||
let len = min (String.length str - !p) De.io_buffer_size in | ||
Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; | ||
p := !p + len; | ||
len | ||
in | ||
let flush buf len = | ||
let str = Bigstringaf.substring buf ~off:0 ~len in | ||
Buffer.add_string r str | ||
in | ||
Gz.Higher.compress ~level ~w ~q ~refill ~flush () cfg i o; | ||
Buffer.contents r | ||
|
||
|
||
exception GzipError of string | ||
|
||
let extract is_gzipped body = | ||
if is_gzipped | ||
then ( | ||
match uncompress_string body with | ||
| Ok content -> content | ||
| Error (`Msg error) -> raise (GzipError error)) | ||
else body | ||
|
||
|
||
let gzip_h = | ||
let gzip_h = Cohttp.Header.of_list [ "accept-encoding", "gzip" ] in | ||
function | ||
| None -> Some gzip_h | ||
| Some h -> Some (Cohttp.Header.add_unless_exists h "accept-encoding" "gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
exception Entry_not_found | ||
exception Bad_gateway | ||
|
||
let rec get ?headers ?fallback uri = | ||
let headers = Ezgz.gzip_h headers in | ||
let open Lwt.Syntax in | ||
let* resp, body = Cohttp_lwt_unix.Client.get ?headers uri in | ||
let status = Cohttp_lwt.Response.status resp in | ||
if status <> `OK then Lwt.ignore_result (Cohttp_lwt.Body.drain_body body); | ||
match status with | ||
| `OK -> | ||
let* body = Cohttp_lwt.Body.to_string body in | ||
let is_gzipped : bool = | ||
Cohttp_lwt.Response.headers resp | ||
|> fun resp -> Cohttp.Header.get resp "content-encoding" = Some "gzip" | ||
in | ||
let open Ezgz in | ||
(try Lwt.return @@ extract is_gzipped body with | ||
| GzipError error -> Lwt.fail @@ Failure error) | ||
| `Found -> | ||
let uri' = Cohttp_lwt.(resp |> Response.headers |> Cohttp.Header.get_location) in | ||
(match uri', fallback with | ||
| Some uri, _ -> get ?headers ?fallback uri | ||
| None, Some uri -> get ?headers uri | ||
| None, None -> | ||
Lwt.fail_with ("Malformed redirection trying to access '" ^ Uri.to_string uri ^ "'.")) | ||
| d when (d = `Not_found || d = `Gateway_timeout) && Option.is_some fallback -> | ||
(match fallback with | ||
| Some uri -> get ?headers uri | ||
| None -> assert false) | ||
| `Bad_request | `Not_found -> Lwt.fail Entry_not_found | ||
| `Bad_gateway -> Lwt.fail Bad_gateway | ||
| _ -> | ||
Lwt.fail_with | ||
("Response error: '" | ||
^ Cohttp.Code.string_of_status status | ||
^ "' trying to access '" | ||
^ Uri.to_string uri | ||
^ "'.") |
Oops, something went wrong.