McGill-NLP · xhluca · Jul 16, 2024 · Jul 16, 2024
diff --git a/docs/_docs/home.md b/docs/_docs/home.md
@@ -88,7 +88,7 @@ demo_names = wl.utils.load_demo_names_in_split(split_path, split='train')
 demo_names = ['saabwsg', 'ygprzve', 'iqaazif']  # 3 random demo from valid
 
 # Load the demonstrations
-demos = [wl.Demonstration(name, base_dir=base_dir) for name in names]
+demos = [wl.Demonstration(name, base_dir=base_dir) for name in demo_names]
 
 # Select a demo to work with
 demo = demos[0]
@@ -183,13 +183,13 @@ from weblinx.processing import load_candidate_elements
 
 # Download the candidates elements generated by the MiniLM-L6-dmr model
 snapshot_download(
-    repo_id="McGill-NLP/WebLINX-full", 
-    repo_type="dataset", 
-    allow_patterns="candidates/*.jsonl", 
+    repo_id="McGill-NLP/WebLINX-full",
+    repo_type="dataset",
+    allow_patterns="candidates/*.jsonl",
     local_dir="./wl_data/"
 )
 
-split = "train"  # or valid, test, test_geo, test_vis, test_web, test_cat 
+split = "train"  # or valid, test, test_geo, test_vis, test_web, test_cat
 candidates_path = f"./wl_data/candidates/{split}.jsonl"
 # Access the candidates
 candidates = load_candidate_elements(path=candidates_path)

diff --git a/modeling/README.md b/modeling/README.md
@@ -14,9 +14,9 @@ snapshot_download(
 
 # candidates files
 snapshot_download(
-    repo_id="McGill-NLP/WebLINX-full", 
-    repo_type="dataset", 
-    allow_patterns="candidates/*.jsonl", 
+    repo_id="McGill-NLP/WebLINX-full",
+    repo_type="dataset",
+    allow_patterns="candidates/*.jsonl",
     local_dir="./wl_data/"
 )
 ```
@@ -72,7 +72,7 @@ ln -s /location/of/your/full/data /location/of/project/weblinx/modeling/wl_data
 For example, if your data is located at `/mnt/research/scratch/users/jdoe/WebLINX-full` but your cloned `weblinx` repository is at `~/dev/weblinx`, then you'd run:
 
 ```bash
-ln -s /mnt/research/scratch/users/jdoe/WebLINX-full ~/dev/weblinx/modeling/wl_data
+ln -s /mnt/research/scratch/users/jdoe/WebLINX-full/* ~/dev/weblinx/modeling/wl_data
 ```
 
 Which corresponds to the `data.base_dir` specified in `config.yml`, which is `"${project_dir}/wl_data/demonstrations/"`.
@@ -122,7 +122,7 @@ The `scores.jsonl` and `results.json` files will be saved at the `cfg.eval.resul
 # Change the following paths to match your setup
 orig_dir="/path/to/weblinx/modeling/results/dmr/sentence-transformers/all-MiniLM-L6-v2"
 
-# This is the directory where the candidates are stored 
+# This is the directory where the candidates are stored
 new_dir="/path/to/wl_data/candidates"
 
 # You need to move the train split if you plan to use it for training the action model

diff --git a/modeling/requirements.txt b/modeling/requirements.txt
@@ -1,4 +1,4 @@
-transformers==4.35.0  # Future version may break the code, upgrade with caution
+transformers==4.42.3  # Future version may break the code, upgrade with caution. Previous stable version was 4.35.0
 lxml
 numpy
 datasets
@@ -19,4 +19,5 @@ coloredlogs
 sacrebleu
 bert-score
 packaging
-ninja
+ninja
+huggingface-hub>=0.23.4, <0.24