Skip to content

Commit

Permalink
Add MMLU benchmarks.
Browse files Browse the repository at this point in the history
  • Loading branch information
knc6 committed Feb 5, 2024
1 parent f6f699a commit c33eb93
Show file tree
Hide file tree
Showing 16 changed files with 468 additions and 0 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"model_name": "itsliupeng_llama2_7b_zh",
"project_url": "https://huggingface.co/itsliupeng/llama2_7b_zh",
"date_submitted": "01-30-2024",
"author_email": "[email protected]",
"database_version": "12-12-2022",
"team_name": "ChemNLP",
"time_taken_seconds": {
"AI-TextGen-text-arxiv_gen-test-rouge.csv.zip": ""
},
"language": "python",
"os": "linux",
"software_used": "jarvis-tools,numpy,scipy,torch,alignn",
"hardware_used": "nisaba-cluster at NIST, V100 Tesla GPU",
"git_url": [
"https://huggingface.co/itsliupeng/llama2_7b_zh"
]
}
99 changes: 99 additions & 0 deletions jarvis_leaderboard/contributions/itsliupeng_llama2_7b_zh/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# conda activate chemdata
import argparse
import os
import torch
import numpy as np
import pandas as pd
from transformers import (
AutoModelForSeq2SeqLM,
AutoTokenizer,
AutoModelForCausalLM,
)
import time
from tqdm import tqdm
from jarvis.db.jsonutils import loadjson

d = loadjson("mmlu_test.json")
device = "cpu"
if torch.cuda.is_available():

device = torch.device("cuda")
#model_name = "mistralai/Mistral-7B-v0.1"
odel_name = "itsliupeng/llama2_70b_mmlu"
model_name = "meta-llama/Llama-2-7b"
model_name = "meta-llama/Llama-2-7b-hf"
model_name = "meta-llama/Llama-2-13b-hf"
model_name = "meta-llama/Llama-2-7b-chat-hf"
model_name = "itsliupeng/llama2_7b_zh"
if "t5" in model_name:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


if "t5" not in model_name:
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
load_in_8bit=False,
low_cpu_mem_usage=True,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# model.to(device)
# model.to(devices[0])
# if num_gpus > 1:
# model = torch.nn.DataParallel(model) # Use multiple GPUs
# #model = torch.nn.DataParallel(model, device_ids=devices) # Use multiple GPUs

f = open("AI-TextClass-quiz-mmlu_test-test-acc_meta-llama_Llama-2-7b-chat-hf.csv", "w")
#f = open("AI-TextClass-quiz-mmlu_test-test-acc.csv", "w")
f.write("id,target,prediction\n")
# target_labels=[]
# pred_labels=[]
for ii, i in enumerate(tqdm(d)):
#if ii>10805:
prompt = i["prompt"]
label = i["answer"]
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(
model.device
) # .cuda()
# decoder_input_ids = tokenizer("", return_tensors="pt").input_ids #.cuda()
# decoder_input_ids = model._shift_right(decoder_input_ids)
# logits = model(
# input_ids=input_ids, decoder_input_ids=decoder_input_ids
# ).logits.flatten()
# input_ids.to(device)
# logits = model(input_ids=input_ids).logits.flatten()
# logits = model(input_ids=input_ids.to(device)).logits.flatten()
logits = model(input_ids=input_ids).logits[0, -1]
probs = (
torch.nn.functional.softmax(
torch.tensor(
[
logits[tokenizer("A").input_ids[-1]],
logits[tokenizer("B").input_ids[-1]],
logits[tokenizer("C").input_ids[-1]],
logits[tokenizer("D").input_ids[-1]],
]
),
dim=0,
)
.detach()
.cpu()
.numpy()
)
pred = {0: "A", 1: "B", 2: "C", 3: "D"}[np.argmax(probs)]
# print("prompt",prompt)
# print("label",label)
# print("pred",pred)
# print()
# target_labels.append(label)
# pred_labels.append(pred)
line = i["id"] + "," + label + "," + pred + "\n"
# print(line)
f.write(line)
del input_ids
del logits
del probs
f.close()
#!zip AI-TextClass-quiz-mmlu_test-test-acc.csv.zip AI-TextClass-quiz-mmlu_test-test-acc.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python run.py
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"model_name": "meta-llama_Llama-2-7b-chat-hf",
"project_url": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
"date_submitted": "01-30-2024",
"author_email": "[email protected]",
"database_version": "12-12-2022",
"team_name": "ChemNLP",
"time_taken_seconds": {
"AI-TextGen-text-arxiv_gen-test-rouge.csv.zip": ""
},
"language": "python",
"os": "linux",
"software_used": "jarvis-tools,numpy,scipy,torch,alignn",
"hardware_used": "nisaba-cluster at NIST, V100 Tesla GPU",
"git_url": [
"https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# conda activate chemdata
import argparse
import os
import torch
import numpy as np
import pandas as pd
from transformers import (
AutoModelForSeq2SeqLM,
AutoTokenizer,
AutoModelForCausalLM,
)
import time
from tqdm import tqdm
from jarvis.db.jsonutils import loadjson

d = loadjson("mmlu_test.json")
device = "cpu"
if torch.cuda.is_available():

device = torch.device("cuda")
#model_name = "mistralai/Mistral-7B-v0.1"
odel_name = "itsliupeng/llama2_70b_mmlu"
model_name = "meta-llama/Llama-2-7b"
model_name = "itsliupeng/llama2_7b_zh"
model_name = "meta-llama/Llama-2-7b-hf"
model_name = "meta-llama/Llama-2-13b-hf"
model_name = "meta-llama/Llama-2-7b-chat-hf"
if "t5" in model_name:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


if "t5" not in model_name:
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
load_in_8bit=False,
low_cpu_mem_usage=True,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# model.to(device)
# model.to(devices[0])
# if num_gpus > 1:
# model = torch.nn.DataParallel(model) # Use multiple GPUs
# #model = torch.nn.DataParallel(model, device_ids=devices) # Use multiple GPUs

f = open("AI-TextClass-quiz-mmlu_test-test-acc_meta-llama_Llama-2-7b-chat-hf.csv", "w")
#f = open("AI-TextClass-quiz-mmlu_test-test-acc.csv", "w")
f.write("id,target,prediction\n")
# target_labels=[]
# pred_labels=[]
for ii, i in enumerate(tqdm(d)):
#if ii>10805:
prompt = i["prompt"]
label = i["answer"]
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(
model.device
) # .cuda()
# decoder_input_ids = tokenizer("", return_tensors="pt").input_ids #.cuda()
# decoder_input_ids = model._shift_right(decoder_input_ids)
# logits = model(
# input_ids=input_ids, decoder_input_ids=decoder_input_ids
# ).logits.flatten()
# input_ids.to(device)
# logits = model(input_ids=input_ids).logits.flatten()
# logits = model(input_ids=input_ids.to(device)).logits.flatten()
logits = model(input_ids=input_ids).logits[0, -1]
probs = (
torch.nn.functional.softmax(
torch.tensor(
[
logits[tokenizer("A").input_ids[-1]],
logits[tokenizer("B").input_ids[-1]],
logits[tokenizer("C").input_ids[-1]],
logits[tokenizer("D").input_ids[-1]],
]
),
dim=0,
)
.detach()
.cpu()
.numpy()
)
pred = {0: "A", 1: "B", 2: "C", 3: "D"}[np.argmax(probs)]
# print("prompt",prompt)
# print("label",label)
# print("pred",pred)
# print()
# target_labels.append(label)
# pred_labels.append(pred)
line = i["id"] + "," + label + "," + pred + "\n"
# print(line)
f.write(line)
del input_ids
del logits
del probs
f.close()
#!zip AI-TextClass-quiz-mmlu_test-test-acc.csv.zip AI-TextClass-quiz-mmlu_test-test-acc.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python run.py
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"model_name": "meta-llama_Llama-2-7b-hf",
"project_url": "https://huggingface.co/meta-llama/Llama-2-7b-hf",
"date_submitted": "01-30-2024",
"author_email": "[email protected]",
"database_version": "12-12-2022",
"team_name": "ChemNLP",
"time_taken_seconds": {
"AI-TextGen-text-arxiv_gen-test-rouge.csv.zip": ""
},
"language": "python",
"os": "linux",
"software_used": "jarvis-tools,numpy,scipy,torch,alignn",
"hardware_used": "nisaba-cluster at NIST, V100 Tesla GPU",
"git_url": [
"https://huggingface.co/meta-llama/Llama-2-7b-hf"
]
}
99 changes: 99 additions & 0 deletions jarvis_leaderboard/contributions/meta-llama_Llama-2-7b-hf/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# conda activate chemdata
import argparse
import os
import torch
import numpy as np
import pandas as pd
from transformers import (
AutoModelForSeq2SeqLM,
AutoTokenizer,
AutoModelForCausalLM,
)
import time
from tqdm import tqdm
from jarvis.db.jsonutils import loadjson

d = loadjson("mmlu_test.json")
device = "cpu"
if torch.cuda.is_available():

device = torch.device("cuda")
#model_name = "mistralai/Mistral-7B-v0.1"
odel_name = "itsliupeng/llama2_70b_mmlu"
model_name = "meta-llama/Llama-2-7b"
model_name = "itsliupeng/llama2_7b_zh"
model_name = "meta-llama/Llama-2-7b-hf"
model_name = "meta-llama/Llama-2-7b-chat-hf"
model_name = "meta-llama/Llama-2-13b-hf"
if "t5" in model_name:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


if "t5" not in model_name:
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
load_in_8bit=False,
low_cpu_mem_usage=True,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# model.to(device)
# model.to(devices[0])
# if num_gpus > 1:
# model = torch.nn.DataParallel(model) # Use multiple GPUs
# #model = torch.nn.DataParallel(model, device_ids=devices) # Use multiple GPUs

f = open("AI-TextClass-quiz-mmlu_test-test-acc_meta-llama_Llama-2-7b-chat-hf.csv", "w")
#f = open("AI-TextClass-quiz-mmlu_test-test-acc.csv", "w")
f.write("id,target,prediction\n")
# target_labels=[]
# pred_labels=[]
for ii, i in enumerate(tqdm(d)):
#if ii>10805:
prompt = i["prompt"]
label = i["answer"]
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(
model.device
) # .cuda()
# decoder_input_ids = tokenizer("", return_tensors="pt").input_ids #.cuda()
# decoder_input_ids = model._shift_right(decoder_input_ids)
# logits = model(
# input_ids=input_ids, decoder_input_ids=decoder_input_ids
# ).logits.flatten()
# input_ids.to(device)
# logits = model(input_ids=input_ids).logits.flatten()
# logits = model(input_ids=input_ids.to(device)).logits.flatten()
logits = model(input_ids=input_ids).logits[0, -1]
probs = (
torch.nn.functional.softmax(
torch.tensor(
[
logits[tokenizer("A").input_ids[-1]],
logits[tokenizer("B").input_ids[-1]],
logits[tokenizer("C").input_ids[-1]],
logits[tokenizer("D").input_ids[-1]],
]
),
dim=0,
)
.detach()
.cpu()
.numpy()
)
pred = {0: "A", 1: "B", 2: "C", 3: "D"}[np.argmax(probs)]
# print("prompt",prompt)
# print("label",label)
# print("pred",pred)
# print()
# target_labels.append(label)
# pred_labels.append(pred)
line = i["id"] + "," + label + "," + pred + "\n"
# print(line)
f.write(line)
del input_ids
del logits
del probs
f.close()
#!zip AI-TextClass-quiz-mmlu_test-test-acc.csv.zip AI-TextClass-quiz-mmlu_test-test-acc.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python run.py
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"model_name": "mistralai/Mistral-7B-v0.1",
"project_url": "https://huggingface.co/mistralai/Mistral-7B-v0.1",
"date_submitted": "01-30-2024",
"author_email": "[email protected]",
"database_version": "12-12-2022",
"team_name": "ChemNLP",
"time_taken_seconds": {
"AI-TextGen-text-arxiv_gen-test-rouge.csv.zip": ""
},
"language": "python",
"os": "linux",
"software_used": "jarvis-tools,numpy,scipy,torch,alignn",
"hardware_used": "nisaba-cluster at NIST, V100 Tesla GPU",
"git_url": [
"https://huggingface.co/mistralai/Mistral-7B-v0.1"
]
}
Loading

0 comments on commit c33eb93

Please sign in to comment.