From 38eea26321b787d8942d8387377ff74c5ac316c3 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Fri, 17 Dec 2021 10:45:13 +0100 Subject: [PATCH] cuz -> clz, doi -> crossref + ocamlformat Signed-off-by: Marcello Seri --- .ocamlformat | 3 - CHANGES.md | 9 +++ README.md | 1 - bin/doi2bib.ml | 67 +++++++++++---------- doi2bib.opam | 5 +- dune | 1 - dune-project | 4 +- lib/dune | 2 +- lib/http.ml | 97 ++++++++++++++----------------- lib/parser.ml | 48 +++++++-------- tests/arxiv.t/run.t | 58 +++++++++--------- tests/doi.t/run.t | 44 +++++++------- tests/pubmed.t/run.t | 23 ++++---- vendor/cuz/.ocamlformat | 3 - vendor/cuz/CHANGES.md | 8 --- vendor/cuz/LICENSE | 21 ------- vendor/cuz/README.md | 3 - vendor/cuz/cuz.opam | 30 ---------- vendor/cuz/dune-project | 28 --------- vendor/cuz/src/cuz.ml | 12 ---- vendor/cuz/src/cuz.mli | 33 ----------- vendor/cuz/src/cuz_cohttp.ml | 45 -------------- vendor/cuz/src/cuz_cohttp.mli | 13 ----- vendor/cuz/src/cuz_decompress.ml | 90 ---------------------------- vendor/cuz/src/cuz_decompress.mli | 4 -- vendor/cuz/src/cuz_unix.ml | 3 - vendor/cuz/src/cuz_unix.mli | 1 - vendor/cuz/src/dune | 20 ------- 28 files changed, 173 insertions(+), 503 deletions(-) delete mode 100644 dune delete mode 100644 vendor/cuz/.ocamlformat delete mode 100644 vendor/cuz/CHANGES.md delete mode 100644 vendor/cuz/LICENSE delete mode 100644 vendor/cuz/README.md delete mode 100644 vendor/cuz/cuz.opam delete mode 100644 vendor/cuz/dune-project delete mode 100644 vendor/cuz/src/cuz.ml delete mode 100644 vendor/cuz/src/cuz.mli delete mode 100644 vendor/cuz/src/cuz_cohttp.ml delete mode 100644 vendor/cuz/src/cuz_cohttp.mli delete mode 100644 vendor/cuz/src/cuz_decompress.ml delete mode 100644 vendor/cuz/src/cuz_decompress.mli delete mode 100644 vendor/cuz/src/cuz_unix.ml delete mode 100644 vendor/cuz/src/cuz_unix.mli delete mode 100644 vendor/cuz/src/dune diff --git a/.ocamlformat b/.ocamlformat index 6f93117..e69de29 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -1,3 +0,0 @@ -profile=janestreet -wrap-comments=false -let-binding-spacing=sparse diff --git a/CHANGES.md b/CHANGES.md index f188ec1..cf339b3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +# 0.5.2 (2021-12-17) + +- Move from cuz to the published clz +- Move from dx.doi.org to crossref rest api service, + the latter gives better and more consistent results and + does not seem to require a fallback service any longer +- Update arxiv generated bibtex accordingly +- Update ocamlformat + # 0.5.1 (2021-07-01) - Fix for transitive dependency in cuz diff --git a/README.md b/README.md index 3076062..99b8b59 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,6 @@ Examples of use (the bibtex entry is printed on standard output): Each release comes with attached binaries for windows, mac and linux. If you want to build the package yourself, the most immediate way is by running - $ opam pin add doi2bib https://github.com/mseri/doi2bib.git $ opam install doi2bib To run the tests, clone this repository and from of the root of the project run diff --git a/bin/doi2bib.ml b/bin/doi2bib.ml index 23eb8bc..8409980 100644 --- a/bin/doi2bib.ml +++ b/bin/doi2bib.ml @@ -5,39 +5,39 @@ let err s = `Error (false, s) let doi2bib id = match id with | None -> `Help (`Pager, None) - | Some id -> - (match Lwt_main.run (Http.get_bib_entry @@ Parser.parse_id id) with - | bibtex -> `Ok (Printf.printf "%s" bibtex) - | exception Http.PubMed_DOI_not_found -> - err @@ Printf.sprintf "Error: unable to find a DOI entry for %s.\n" id - | exception Http.Entry_not_found -> - err - @@ Printf.sprintf - "Error: unable to find any bibtex entry for %s.\n\ - Check the ID before trying again.\n" - id - | exception Failure s -> err @@ Printf.sprintf "Unexpected error. %s\n" s - | exception Http.Bad_gateway -> - err - @@ Printf.sprintf - "Remote server error: wait some time and try again.\n\ - This error tends to happen when the remote servers are busy." - | exception Parser.Parse_error id -> - err - @@ Printf.sprintf - "Error: unable to parse ID: '%s'.\n\ - You can force me to consider it by prepending 'doi:', 'arxiv:' or 'PMC' as \ - appropriate." - id) - + | Some id -> ( + match Lwt_main.run (Http.get_bib_entry @@ Parser.parse_id id) with + | bibtex -> `Ok (Printf.printf "%s" bibtex) + | exception Http.PubMed_DOI_not_found -> + err @@ Printf.sprintf "Error: unable to find a DOI entry for %s.\n" id + | exception Http.Entry_not_found -> + err + @@ Printf.sprintf + "Error: unable to find any bibtex entry for %s.\n\ + Check the ID before trying again.\n" + id + | exception Failure s -> err @@ Printf.sprintf "Unexpected error. %s\n" s + | exception Http.Bad_gateway -> + err + @@ Printf.sprintf + "Remote server error: wait some time and try again.\n\ + This error tends to happen when the remote servers are busy." + | exception Parser.Parse_error id -> + err + @@ Printf.sprintf + "Error: unable to parse ID: '%s'.\n\ + You can force me to consider it by prepending 'doi:', 'arxiv:' \ + or 'PMC' as appropriate." + id) let () = let open Cmdliner in let id = let doc = - "A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically infer what \ - kind of ID you are using. You can force the cli to lookup a DOI by using the form \ - 'doi:ID' or an arXiv ID by using the form 'arXiv:ID'.\n\ + "A DOI, an arXiv ID or a PubMed ID. The tool tries to automatically \ + infer what kind of ID you are using. You can force the cli to lookup a \ + DOI by using the form 'doi:ID' or an arXiv ID by using the form \ + 'arXiv:ID'.\n\ PubMed IDs always start with 'PMC'." in Arg.(value & pos 0 (some string) None & info ~docv:"ID" ~doc []) @@ -45,11 +45,16 @@ let () = let doi2bib_t = Term.(ret (const doi2bib $ id)) in let info = let doc = - "A little CLI tool to get the bibtex entry for a given DOI, arXiv or PubMed ID." + "A little CLI tool to get the bibtex entry for a given DOI, arXiv or \ + PubMed ID." in let man = - [ `S Manpage.s_bugs; `P "Report bugs to https://github.com/mseri/doi2bib/issues" ] + [ + `S Manpage.s_bugs; + `P "Report bugs to https://github.com/mseri/doi2bib/issues"; + ] in - Term.info "doi2bib" ~version:"%%VERSION%%" ~doc ~exits:Term.default_exits ~man + Term.info "doi2bib" ~version:"%%VERSION%%" ~doc ~exits:Term.default_exits + ~man in Term.exit @@ Term.eval (doi2bib_t, info) diff --git a/doi2bib.opam b/doi2bib.opam index f3d77a9..02271c1 100644 --- a/doi2bib.opam +++ b/doi2bib.opam @@ -13,7 +13,7 @@ depends: [ "astring" {>= "0.8.0"} "cohttp-lwt-unix" {>= "2.5.0"} "cmdliner" {>= "1.0.0"} - "decompress" {>= "1.4.0"} + "clz" {>= "0.1.0"} "ezxmlm" {>= "1.1.0"} "lwt" {>= "5.3.0"} "bigstringaf" {>= "0.2.0"} @@ -21,9 +21,6 @@ depends: [ "re" {>= "1.0.0"} "odoc" {with-doc} ] -conflicts: [ - "result" {< "1.5"} # uses Result.map but result can be pulled via lwt and takes over the Result module -] build: [ ["dune" "subst"] {dev} [ diff --git a/dune b/dune deleted file mode 100644 index 9486548..0000000 --- a/dune +++ /dev/null @@ -1 +0,0 @@ -(vendored_dirs vendor) diff --git a/dune-project b/dune-project index ff7d056..414d4d8 100644 --- a/dune-project +++ b/dune-project @@ -28,8 +28,8 @@ (>= 2.5.0)) (cmdliner (>= 1.0.0)) - (decompress - (>= 1.4.0)) + (clz + (>= 0.1.0)) (ezxmlm (>= 1.1.0)) (lwt diff --git a/lib/dune b/lib/dune index a65e92b..70ecfe2 100644 --- a/lib/dune +++ b/lib/dune @@ -1,4 +1,4 @@ (library (name doi2bib) - (libraries astring cohttp-lwt-unix cuz.cohttp ezxmlm lwt re unix) + (libraries astring cohttp-lwt-unix clz.cohttp ezxmlm lwt re unix) (preprocess future_syntax)) diff --git a/lib/http.ml b/lib/http.ml index f58d6c7..0a09928 100644 --- a/lib/http.ml +++ b/lib/http.ml @@ -5,61 +5,52 @@ exception Bad_gateway exception PubMed_DOI_not_found let rec get ?proxy ?headers ?fallback uri = - let headers = Cuz_cohttp.accept_gzde headers in + let headers = Clz_cohttp.update_header headers in let uri = Option.value ~default:"" proxy ^ uri |> Uri.of_string in let open Lwt.Syntax in let* resp, body = Cohttp_lwt_unix.Client.get ~headers uri in let status = Cohttp_lwt.Response.status resp in - let* () = if status <> `OK then Cohttp_lwt.Body.drain_body body else Lwt.return_unit in + let* () = + if status <> `OK then Cohttp_lwt.Body.drain_body body else Lwt.return_unit + in match status with - | `OK -> Cuz_cohttp.decompress (resp, body) - | `Found -> - let uri' = Cohttp_lwt.(resp |> Response.headers |> Cohttp.Header.get_location) in - (match uri', fallback with - | Some uri, _ -> get ?proxy ~headers ?fallback (Uri.to_string uri) - | None, Some uri -> get ?proxy ~headers uri - | None, None -> - Lwt.fail_with ("Malformed redirection trying to access '" ^ Uri.to_string uri ^ "'.")) - | d when (d = `Not_found || d = `Gateway_timeout) && Option.is_some fallback -> - (match fallback with - | Some uri -> get ?proxy ~headers uri - | None -> assert false) + | `OK -> Clz_cohttp.decompress (resp, body) + | `Found -> ( + let uri' = + Cohttp_lwt.(resp |> Response.headers |> Cohttp.Header.get_location) + in + match (uri', fallback) with + | Some uri, _ -> get ?proxy ~headers ?fallback (Uri.to_string uri) + | None, Some uri -> get ?proxy ~headers uri + | None, None -> + Lwt.fail_with + ("Malformed redirection trying to access '" ^ Uri.to_string uri + ^ "'.")) + | d when (d = `Not_found || d = `Gateway_timeout) && Option.is_some fallback + -> ( + match fallback with + | Some uri -> get ?proxy ~headers uri + | None -> assert false) | `Bad_request | `Not_found -> Lwt.fail Entry_not_found | `Bad_gateway -> Lwt.fail Bad_gateway | _ -> - Lwt.fail_with - ("Response error: '" - ^ Cohttp.Code.string_of_status status - ^ "' trying to access '" - ^ Uri.to_string uri - ^ "'.") - - -let cleanup = - let re_whsp = Re.(compile @@ seq [ bol; rep1 space ]) in - let re_endbr = Re.(compile @@ str "},") in - let re_title = Re.(compile @@ str ", title=") in - let re_last = Re.(compile @@ seq [ char '}'; rep space; char '}' ]) in - fun body -> - let body = Re.replace_string ~by:"" re_whsp body in - let body = Re.replace_string ~by:"},\n " re_endbr body in - let body = Re.replace_string ~by:",\n title=" re_title body in - Re.replace_string ~by:"}\n}" re_last body - + Lwt.fail_with + ("Response error: '" + ^ Cohttp.Code.string_of_status status + ^ "' trying to access '" ^ Uri.to_string uri ^ "'.") let bib_of_doi ?proxy doi = - let uri = "https://dx.doi.org/" ^ String.trim doi in + let uri = + "https://api.crossref.org/works/" ^ String.trim doi + ^ "/transform/application/x-bibtex" + in let headers = Cohttp.Header.of_list - [ "Accept", "text/bibliography; style=bibtex"; "charset", "utf-8" ] - in - let fallback = - "https://citation.crosscite.org/format?doi=" ^ doi ^ "&style=bibtex&lang=en-US" + [ ("Accept", "text/bibliography; style=bibtex"); ("charset", "utf-8") ] in let open Lwt.Syntax in - let* body = get ?proxy ~headers ~fallback uri in - Lwt.return (cleanup body) - + let* body = get ?proxy ~headers uri in + Lwt.return body let bib_of_arxiv ?proxy arxiv = let uri = "https://export.arxiv.org/api/query?id_list=" ^ String.trim arxiv in @@ -68,41 +59,40 @@ let bib_of_arxiv ?proxy arxiv = let _, atom_blob = Ezxmlm.from_string body in try let doi = - Ezxmlm.(atom_blob |> member "feed" |> member "entry" |> member "doi" |> to_string) + Ezxmlm.( + atom_blob |> member "feed" |> member "entry" |> member "doi" + |> to_string) in bib_of_doi ?proxy doi - with - | Ezxmlm.Tag_not_found _ -> + with Ezxmlm.Tag_not_found _ -> Lwt.catch (fun () -> get ("https://arxiv.org/bibtex/" ^ String.trim arxiv)) (fun _e -> parse_atom arxiv atom_blob |> Lwt.return) - let bib_of_pubmed ?proxy pubmed = let pubmed = String.trim pubmed in - let uri = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids=" ^ pubmed in + let uri = + "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids=" ^ pubmed + in let open Lwt.Syntax in let* body = get ?proxy uri in let _, xml_blob = Ezxmlm.from_string body in try let doi = ref "" in let _ = - Ezxmlm.filter_map - ~tag:"record" + Ezxmlm.filter_map ~tag:"record" ~f:(fun attrs node -> doi := Ezxmlm.get_attr "doi" attrs; node) xml_blob in bib_of_doi ?proxy !doi - with - | Not_found -> + with Not_found -> let exn = match Ezxmlm.( - member "pmcids" xml_blob - |> member_with_attr "record" - |> fun (a, _) -> mem_attr "status" "error" a) + member "pmcids" xml_blob |> member_with_attr "record" |> fun (a, _) -> + mem_attr "status" "error" a) with | true -> Entry_not_found | false -> PubMed_DOI_not_found @@ -110,7 +100,6 @@ let bib_of_pubmed ?proxy pubmed = in Lwt.fail exn - let get_bib_entry ?proxy = function | DOI doi -> bib_of_doi ?proxy doi | ArXiv arxiv -> bib_of_arxiv ?proxy arxiv diff --git a/lib/parser.ml b/lib/parser.ml index 7236947..720f9e0 100644 --- a/lib/parser.ml +++ b/lib/parser.ml @@ -1,7 +1,4 @@ -type id = - | DOI of string - | ArXiv of string - | PubMed of string +type id = DOI of string | ArXiv of string | PubMed of string exception Parse_error of string @@ -10,11 +7,12 @@ let string_of_id = function | ArXiv s -> "arXiv ID '" ^ s ^ "'" | PubMed s -> "PubMed ID '" ^ s ^ "'" - let parse_id id = let open Astring in let is_prefix affix s = String.is_prefix ~affix (String.Ascii.lowercase s) in - let sub start s = String.sub ~start s |> String.Sub.to_string |> String.trim in + let sub start s = + String.sub ~start s |> String.Sub.to_string |> String.trim + in let contains c s = String.exists (fun c' -> c' = c) s in match id with | doi when is_prefix "doi:" doi -> DOI (sub 4 doi) @@ -24,25 +22,24 @@ let parse_id id = | arxiv when contains '.' arxiv -> ArXiv (String.trim arxiv) | _ -> raise (Parse_error id) - let parse_atom id atom = let bibentry () = let open Ezxmlm in let entry = atom |> member "feed" |> member "entry" in let title = entry |> member "title" |> to_string in let authors = - entry - |> members "author" + entry |> members "author" |> List.map (fun n -> member "name" n |> to_string) |> String.concat " and " in let year = - try entry |> member "updated" |> to_string |> fun s -> String.sub s 0 4 with - | Tag_not_found _ -> + try entry |> member "updated" |> to_string |> fun s -> String.sub s 0 4 + with Tag_not_found _ -> entry |> member "published" |> to_string |> fun s -> String.sub s 0 4 in let cat = - entry |> member_with_attr "primary_category" |> fun (a, _) -> get_attr "term" a + entry |> member_with_attr "primary_category" |> fun (a, _) -> + get_attr "term" a in let bibid = let open Astring in @@ -55,21 +52,18 @@ let parse_atom id atom = in Printf.sprintf {|@misc{%s, - title={%s}, - author={%s}, - year={%s}, - eprint={%s}, - archivePrefix={arXiv}, - primaryClass={%s} + title={%s}, + author={%s}, + year={%s}, + eprint={%s}, + archivePrefix={arXiv}, + primaryClass={%s} }|} - bibid - title - authors - year - id - cat + bibid title authors year id cat in - try bibentry () with - | Ezxmlm.Tag_not_found t -> + try bibentry () + with Ezxmlm.Tag_not_found t -> raise - @@ Failure ("Unexpected error parsing arXiv's metadata, tag '" ^ t ^ "' not present.") + @@ Failure + ("Unexpected error parsing arXiv's metadata, tag '" ^ t + ^ "' not present.") diff --git a/tests/arxiv.t/run.t b/tests/arxiv.t/run.t index c7c9344..e2d3994 100644 --- a/tests/arxiv.t/run.t +++ b/tests/arxiv.t/run.t @@ -1,42 +1,40 @@ Arxiv entry (with DOI entry) executed with prefix $ doi2bib arXiv:1609.01724 @article{Prandi_2018, - title={Quantum confinement on non-complete Riemannian manifolds}, - volume={8}, - ISSN={1664-039X}, - url={http://dx.doi.org/10.4171/JST/226}, - DOI={10.4171/jst/226}, - number={4}, - journal={Journal of Spectral Theory}, - publisher={European Mathematical Society Publishing House}, - author={Prandi, Dario and Rizzi, Luca and Seri, Marcello}, - year={2018}, - month={Jul}, - pages={1221–1280} + doi = {10.4171/jst/226}, + url = {https://doi.org/10.4171%2Fjst%2F226}, + year = 2018, + month = {jul}, + publisher = {European Mathematical Society - {EMS} - Publishing House {GmbH}}, + volume = {8}, + number = {4}, + pages = {1221--1280}, + author = {Dario Prandi and Luca Rizzi and Marcello Seri}, + title = {Quantum confinement on non-complete Riemannian manifolds}, + journal = {Journal of Spectral Theory} } Arxiv Entry (with DOI entry) executed without prefix $ doi2bib 1902.00436 @article{Vermeeren_2019, - title={Contact variational integrators}, - volume={52}, - ISSN={1751-8121}, - url={http://dx.doi.org/10.1088/1751-8121/ab4767}, - DOI={10.1088/1751-8121/ab4767}, - number={44}, - journal={Journal of Physics A: Mathematical and Theoretical}, - publisher={IOP Publishing}, - author={Vermeeren, Mats and Bravetti, Alessandro and Seri, Marcello}, - year={2019}, - month={Oct}, - pages={445206} + doi = {10.1088/1751-8121/ab4767}, + url = {https://doi.org/10.1088%2F1751-8121%2Fab4767}, + year = 2019, + month = {oct}, + publisher = {{IOP} Publishing}, + volume = {52}, + number = {44}, + pages = {445206}, + author = {Mats Vermeeren and Alessandro Bravetti and Marcello Seri}, + title = {Contact variational integrators}, + journal = {Journal of Physics A: Mathematical and Theoretical} } Arxiv Entry (without DOI entry) and with old id $ doi2bib arXiv:math/0606217 @misc{Marklof2006Distribution, - title={Distribution modulo one and Ratner's theorem}, - author={Jens Marklof}, - year={2006}, - eprint={math/0606217}, - archivePrefix={arXiv}, - primaryClass={math.NT} + title={Distribution modulo one and Ratner's theorem}, + author={Jens Marklof}, + year={2006}, + eprint={math/0606217}, + archivePrefix={arXiv}, + primaryClass={math.NT} } diff --git a/tests/doi.t/run.t b/tests/doi.t/run.t index 02d65d1..2bc9006 100644 --- a/tests/doi.t/run.t +++ b/tests/doi.t/run.t @@ -1,31 +1,29 @@ DOI entry executed without prefix $ doi2bib 10.1007/s10569-019-9946-9 @article{Bravetti_2020, - title={Numerical integration in Celestial Mechanics: a case for contact geometry}, - volume={132}, - ISSN={1572-9478}, - url={http://dx.doi.org/10.1007/s10569-019-9946-9}, - DOI={10.1007/s10569-019-9946-9}, - number={1}, - journal={Celestial Mechanics and Dynamical Astronomy}, - publisher={Springer Science and Business Media LLC}, - author={Bravetti, Alessandro and Seri, Marcello and Vermeeren, Mats and Zadra, Federico}, - year={2020}, - month={Jan} + doi = {10.1007/s10569-019-9946-9}, + url = {https://doi.org/10.1007%2Fs10569-019-9946-9}, + year = 2020, + month = {jan}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {132}, + number = {1}, + author = {Alessandro Bravetti and Marcello Seri and Mats Vermeeren and Federico Zadra}, + title = {Numerical integration in Celestial Mechanics: a case for contact geometry}, + journal = {Celestial Mechanics and Dynamical Astronomy} } DOI entry executed with prefix $ doi2bib doi:10.4171/JST/226 @article{Prandi_2018, - title={Quantum confinement on non-complete Riemannian manifolds}, - volume={8}, - ISSN={1664-039X}, - url={http://dx.doi.org/10.4171/JST/226}, - DOI={10.4171/jst/226}, - number={4}, - journal={Journal of Spectral Theory}, - publisher={European Mathematical Society Publishing House}, - author={Prandi, Dario and Rizzi, Luca and Seri, Marcello}, - year={2018}, - month={Jul}, - pages={1221–1280} + doi = {10.4171/jst/226}, + url = {https://doi.org/10.4171%2Fjst%2F226}, + year = 2018, + month = {jul}, + publisher = {European Mathematical Society - {EMS} - Publishing House {GmbH}}, + volume = {8}, + number = {4}, + pages = {1221--1280}, + author = {Dario Prandi and Luca Rizzi and Marcello Seri}, + title = {Quantum confinement on non-complete Riemannian manifolds}, + journal = {Journal of Spectral Theory} } diff --git a/tests/pubmed.t/run.t b/tests/pubmed.t/run.t index d2014ee..db69d06 100644 --- a/tests/pubmed.t/run.t +++ b/tests/pubmed.t/run.t @@ -1,16 +1,15 @@ Pubmed entry with 'PMC' prefix. $ doi2bib PMC2883744 @article{Comas_2010, - title={Human T cell epitopes of Mycobacterium tuberculosis are evolutionarily hyperconserved}, - volume={42}, - ISSN={1546-1718}, - url={http://dx.doi.org/10.1038/ng.590}, - DOI={10.1038/ng.590}, - number={6}, - journal={Nature Genetics}, - publisher={Springer Science and Business Media LLC}, - author={Comas, Iñaki and Chakravartti, Jaidip and Small, Peter M and Galagan, James and Niemann, Stefan and Kremer, Kristin and Ernst, Joel D and Gagneux, Sebastien}, - year={2010}, - month={May}, - pages={498–503} + doi = {10.1038/ng.590}, + url = {https://doi.org/10.1038%2Fng.590}, + year = 2010, + month = {may}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {42}, + number = {6}, + pages = {498--503}, + author = {I{\~{n}}aki Comas and Jaidip Chakravartti and Peter M Small and James Galagan and Stefan Niemann and Kristin Kremer and Joel D Ernst and Sebastien Gagneux}, + title = {Human T cell epitopes of Mycobacterium tuberculosis are evolutionarily hyperconserved}, + journal = {Nature Genetics} } diff --git a/vendor/cuz/.ocamlformat b/vendor/cuz/.ocamlformat deleted file mode 100644 index 6f93117..0000000 --- a/vendor/cuz/.ocamlformat +++ /dev/null @@ -1,3 +0,0 @@ -profile=janestreet -wrap-comments=false -let-binding-spacing=sparse diff --git a/vendor/cuz/CHANGES.md b/vendor/cuz/CHANGES.md deleted file mode 100644 index 897900d..0000000 --- a/vendor/cuz/CHANGES.md +++ /dev/null @@ -1,8 +0,0 @@ -# 0.0.1 - -- Updated API to support deflate -- Update API to mimic https://github.com/tmattio/dream-encoding - -# 0.0.0 - -- First unstable release \ No newline at end of file diff --git a/vendor/cuz/LICENSE b/vendor/cuz/LICENSE deleted file mode 100644 index 5df206a..0000000 --- a/vendor/cuz/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2019 Marcello Seri - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/vendor/cuz/README.md b/vendor/cuz/README.md deleted file mode 100644 index 7dd8d3f..0000000 --- a/vendor/cuz/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Gzip compression support for cohttp-lwt using decompress - -WIP \ No newline at end of file diff --git a/vendor/cuz/cuz.opam b/vendor/cuz/cuz.opam deleted file mode 100644 index cd4dc7b..0000000 --- a/vendor/cuz/cuz.opam +++ /dev/null @@ -1,30 +0,0 @@ -# This file is generated by dune, edit dune-project instead -opam-version: "2.0" -synopsis: "Gzip compression support for cohttp-lwt using decompress" -maintainer: ["marcello.seri@gmail.com"] -authors: ["Marcello Seri"] -license: "MIT" -homepage: "https://github.com/mseri/ocaml-cuz" -doc: "https://mseri.github.io/ocaml-cuz" -bug-reports: "https://github.com/mseri/ocaml-cuz/issues" -depends: [ - "dune" {>= "2.0"} - "ocaml" {>= "4.08"} - "cohttp-lwt" {>= "2.5.0"} - "decompress" {>= "1.3.0"} -] -build: [ - ["dune" "subst"] {pinned} - [ - "dune" - "build" - "-p" - name - "-j" - jobs - "@install" - "@runtest" {with-test} - "@doc" {with-doc} - ] -] -dev-repo: "git+https://github.com/mseri/ocaml-cuz.git" diff --git a/vendor/cuz/dune-project b/vendor/cuz/dune-project deleted file mode 100644 index 82b4049..0000000 --- a/vendor/cuz/dune-project +++ /dev/null @@ -1,28 +0,0 @@ -(lang dune 2.0) - -(name cuz) - -(generate_opam_files true) - -(source - (github mseri/ocaml-cuz)) - -(license MIT) - -(authors "Marcello Seri") - -(maintainers "marcello.seri@gmail.com") - -(package - (name cuz) - (synopsis "Gzip compression support for cohttp-lwt using decompress") - (documentation "https://mseri.github.io/ocaml-cuz") - (depends - (ocaml - (>= 4.08)) - (bigstringaf - (>= 0.5.0)) - (cohttp-lwt - (>= 2.5.0)) - (decompress - (>= 1.3.0)))) diff --git a/vendor/cuz/src/cuz.ml b/vendor/cuz/src/cuz.ml deleted file mode 100644 index 3cd96d8..0000000 --- a/vendor/cuz/src/cuz.ml +++ /dev/null @@ -1,12 +0,0 @@ -open Cuz_decompress - -let inflate_string ~algorithm str = - match algorithm with - | `Deflate -> inflate_string_de str - | `Gzip -> inflate_string_gz str - - -let deflate_string ~algorithm ~cfg ?level str = - match algorithm with - | `Deflate -> deflate_string_de str - | `Gzip -> deflate_string_gz ~cfg ?level str diff --git a/vendor/cuz/src/cuz.mli b/vendor/cuz/src/cuz.mli deleted file mode 100644 index 553b30a..0000000 --- a/vendor/cuz/src/cuz.mli +++ /dev/null @@ -1,33 +0,0 @@ -(** - The implementation is mostly out of deflate documentation. - It should be possible, is somebody wants to give it a try, to abstract - the current interfaces over Cohttp and Lwt and have an implementation - that can work with streamed response bodies and that does direct output - to file. - The current basic implementation is more than enough for my limited needs. - - [deflate_string] requires an external configuration only because I have been - playing around with this also in [js_of_ocaml]. If you don't mind linking - against [unix] you can use {!Cuz_unix.cfg}, part of the [cuz.unix] sub-library. - - The [cuz.cohttp] library contains the module {!Cuz_cohttp}, which provides - some helpers to add the necessary accept headers and to decompress the response - bodies. -*) - -(** [inflate_string ~algorithm body] returns [body] compressed using [algorithm] -or the respective error message. *) -val inflate_string - : algorithm:[< `Deflate | `Gzip ] - -> string - -> (string, [> `Msg of string ]) result - -(** [deflate_string ~algorithm ~cfg ?level body] extract the content of [body] -using [algorithm] or the respective error message. If the algorithm is gzip, it -will use [cfg] and [level] for the decompression. *) -val deflate_string - : algorithm:[< `Deflate | `Gzip ] - -> cfg:unit Gz.Higher.configuration - -> ?level:int - -> string - -> string diff --git a/vendor/cuz/src/cuz_cohttp.ml b/vendor/cuz/src/cuz_cohttp.ml deleted file mode 100644 index 2c8ff60..0000000 --- a/vendor/cuz/src/cuz_cohttp.ml +++ /dev/null @@ -1,45 +0,0 @@ -exception CuzError of string - -let encoding_of_string = function - | "deflate" -> `Deflate - | "gzip" -> `Gzip - | s -> `Unknown s - - -let content_encodings s = - String.split_on_char ',' s - |> List.map (fun x -> x |> String.trim |> String.lowercase_ascii) - |> List.map encoding_of_string - |> Option.some - - -let decompress (resp, body) = - let rec aux algorithms content = - match algorithms with - | [] -> Ok content - | (`Deflate as el) :: rest | (`Gzip as el) :: rest -> - Result.bind (Cuz.inflate_string ~algorithm:el content) (aux rest) - | `Unknown d :: _rest -> Error (`Msg ("Unsopported encoding directive '" ^ d ^ "'")) - in - let open Lwt.Syntax in - let* body = Cohttp_lwt.Body.to_string body in - let algorithms = - let headers = Cohttp_lwt.Response.headers resp in - let algorithms = Cohttp.Header.get headers "content-encoding" in - Option.bind algorithms content_encodings - in - match algorithms with - | None -> Lwt.return body - | Some algorithms -> - let body = aux algorithms body in - (match body with - | Ok body -> Lwt.return body - | Error (`Msg err) -> Lwt.fail (CuzError err)) - - -let accept_gzde = - let open Cohttp.Header in - let gzip_h = of_list [ "accept-encoding", "gzip,deflate" ] in - function - | None -> gzip_h - | Some h -> add_unless_exists h "accept-encoding" "gzip,deflate" diff --git a/vendor/cuz/src/cuz_cohttp.mli b/vendor/cuz/src/cuz_cohttp.mli deleted file mode 100644 index 2844f13..0000000 --- a/vendor/cuz/src/cuz_cohttp.mli +++ /dev/null @@ -1,13 +0,0 @@ -exception CuzError of string - -(** [decompress (resp, body)] returns the contents of body, decompressed -using the information from the "content-encoding" header or fails with -[CuzError msg] if there are decompression issues or an unknown algorithm -is required. -*) -val decompress : Cohttp_lwt.Response.t * Cohttp_lwt.Body.t -> string Lwt.t - -(** [accept_gzde h] returns a new header including "accept-header:gzip,deflate" -if the "accept-header" key was not present or [h] was [None], and the unmodified - headers otherwise.*) -val accept_gzde : Cohttp.Header.t option -> Cohttp.Header.t diff --git a/vendor/cuz/src/cuz_decompress.ml b/vendor/cuz/src/cuz_decompress.ml deleted file mode 100644 index 1c5f01d..0000000 --- a/vendor/cuz/src/cuz_decompress.ml +++ /dev/null @@ -1,90 +0,0 @@ -(* - Mostly from deflate.gz documentation :) - If somebody cares, we could abstract them over Cohttp and Lwt - and have an implementation that can work with streamed response bodies - and does directly output to file. - The current dumb implementation is more than enough for my limited needs. -*) - -let inflate_string_de str = - let i = De.bigstring_create De.io_buffer_size in - let o = De.bigstring_create De.io_buffer_size in - let w = De.make_window ~bits:15 in - let r = Buffer.create 0x1000 in - let p = ref 0 in - let refill buf = - let len = min (String.length str - !p) De.io_buffer_size in - Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; - p := !p + len; - len - in - let flush buf len = - let str = Bigstringaf.substring buf ~off:0 ~len in - Buffer.add_string r str - in - match De.Higher.uncompress ~w ~refill ~flush i o with - | Ok () -> Ok (Buffer.contents r) - | Error _ as err -> err - - -let deflate_string_de str = - let i = De.bigstring_create De.io_buffer_size in - let o = De.bigstring_create De.io_buffer_size in - let w = De.Lz77.make_window ~bits:15 in - let q = De.Queue.create 0x1000 in - let r = Buffer.create 0x1000 in - let p = ref 0 in - let refill buf = - (* assert (buf == i); *) - let len = min (String.length str - !p) De.io_buffer_size in - Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; - p := !p + len; - len - in - let flush buf len = - (* assert (buf == o); *) - let str = Bigstringaf.substring buf ~off:0 ~len in - Buffer.add_string r str - in - De.Higher.compress ~w ~q ~refill ~flush i o; - Buffer.contents r - - -let inflate_string_gz str = - let i = De.bigstring_create De.io_buffer_size in - let o = De.bigstring_create De.io_buffer_size in - let r = Buffer.create 0x1000 in - let p = ref 0 in - let refill buf = - let len = min (String.length str - !p) De.io_buffer_size in - Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; - p := !p + len; - len - in - let flush buf len = - let str = Bigstringaf.substring buf ~off:0 ~len in - Buffer.add_string r str - in - Gz.Higher.uncompress ~refill ~flush i o - |> Result.map (fun _metadata -> Buffer.contents r) - - -let deflate_string_gz ~cfg ?(level = 4) str = - let i = De.bigstring_create De.io_buffer_size in - let o = De.bigstring_create De.io_buffer_size in - let w = De.Lz77.make_window ~bits:15 in - let q = De.Queue.create 0x1000 in - let r = Buffer.create 0x1000 in - let p = ref 0 in - let refill buf = - let len = min (String.length str - !p) De.io_buffer_size in - Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; - p := !p + len; - len - in - let flush buf len = - let str = Bigstringaf.substring buf ~off:0 ~len in - Buffer.add_string r str - in - Gz.Higher.compress ~level ~w ~q ~refill ~flush () cfg i o; - Buffer.contents r diff --git a/vendor/cuz/src/cuz_decompress.mli b/vendor/cuz/src/cuz_decompress.mli deleted file mode 100644 index 2adb8a9..0000000 --- a/vendor/cuz/src/cuz_decompress.mli +++ /dev/null @@ -1,4 +0,0 @@ -val inflate_string_de : string -> (string, [> `Msg of string ]) result -val deflate_string_de : string -> string -val inflate_string_gz : string -> (string, [> `Msg of string ]) result -val deflate_string_gz : cfg:unit Gz.Higher.configuration -> ?level:int -> string -> string diff --git a/vendor/cuz/src/cuz_unix.ml b/vendor/cuz/src/cuz_unix.ml deleted file mode 100644 index 179b6b7..0000000 --- a/vendor/cuz/src/cuz_unix.ml +++ /dev/null @@ -1,3 +0,0 @@ -let cfg = - let time () = Int32.of_float (Unix.gettimeofday ()) in - Gz.Higher.configuration Gz.Unix time diff --git a/vendor/cuz/src/cuz_unix.mli b/vendor/cuz/src/cuz_unix.mli deleted file mode 100644 index 6425108..0000000 --- a/vendor/cuz/src/cuz_unix.mli +++ /dev/null @@ -1 +0,0 @@ -val cfg : unit Gz.Higher.configuration diff --git a/vendor/cuz/src/dune b/vendor/cuz/src/dune deleted file mode 100644 index d6a35fd..0000000 --- a/vendor/cuz/src/dune +++ /dev/null @@ -1,20 +0,0 @@ -(library - (public_name cuz) - (name cuz) - (modules :standard \ cuz_unix cuz_cohttp) - (libraries bigstringaf cohttp-lwt decompress.gz decompress.de) - (preprocess future_syntax)) - -(library - (public_name cuz.unix) - (name cuz_unix) - (modules cuz_unix) - (libraries decompress.gz unix) - (preprocess future_syntax)) - -(library - (public_name cuz.cohttp) - (name cuz_cohttp) - (modules cuz_cohttp) - (libraries cohttp-lwt cuz lwt) - (preprocess future_syntax))