Added OpenAI connector and pytest harness

Signed-off-by: Jacob Torrey <jacob@thinkst.com>
pull/6/head
Jacob Torrey 2023-05-15 13:37:16 -06:00
rodzic d1f5562602
commit a9be80e94b
2 zmienionych plików z 72 dodań i 0 usunięć

46
openai_detect.py 100644
Wyświetl plik

@ -0,0 +1,46 @@
#!/usr/bin/env python3
import os, requests
from typing import Optional, Dict, Tuple
MODEL_NAME = 'model-detect-v2'
API_KEY = os.getenv('OPENAI_API_KEY')
API_URL = 'https://api.openai.com/v1/completions'
def make_req(text : str) -> Optional[Dict]:
if len(text) < 1000:
print("Input too short for OpenAI to classify")
return None
headers = {
'authorization': 'Bearer ' + API_KEY,
'origin': 'https://platform.openai.com',
'openai-organization': 'org-gxAZne8U4jJ8pb632XJBLH1i'
}
data = {
'prompt': text + '<disc_score|>',
'max_tokens': 1,
'temperature': 1,
'top_p': 1,
'n': 1,
'model': MODEL_NAME,
'stream': False,
'stop': '\\n',
'logprobs': 5
}
res = requests.post(API_URL, headers=headers, json=data)
return res.json().get('choices', [None])[0]
def run_on_file(fn : str) -> Optional[Tuple[str, float]]:
with open(fn, 'r') as fp:
contents = fp.read()
res = make_req(contents)
if res is None:
print("Unable to classify!")
return None
else:
#print(res)
if res.get('text') == '"':
return ('AI', abs(res.get('logprobs').get('token_logprobs')[0]))
elif res.get('text') == '!':
return ('Human', abs(res.get('logprobs').get('token_logprobs')[0]))
return None #res.get('text')

Wyświetl plik

@ -0,0 +1,26 @@
#!/usr/bin/env python3
import pytest, os
from warnings import warn
from openai_detect import run_on_file
AI_SAMPLE_DIR = 'samples/llm-generated/'
HUMAN_SAMPLE_DIR = 'samples/human-generated/'
ai_files = os.listdir(AI_SAMPLE_DIR)
ai_files = filter(lambda f: os.path.getsize(AI_SAMPLE_DIR + f) >= 1000, ai_files)
human_files = os.listdir(HUMAN_SAMPLE_DIR)
human_files = filter(lambda f: os.path.getsize(HUMAN_SAMPLE_DIR + f) >= 1000, human_files)
def test_training_file():
assert run_on_file('ai-generated.txt')[0] == 'AI', 'The training corpus should always be detected as AI-generated... since it is'
@pytest.mark.parametrize('f', human_files)
def test_human_samples(f):
(classification, score) = run_on_file(HUMAN_SAMPLE_DIR + f)
assert classification == 'Human', f + ' is a human-generated file, misclassified as AI-generated with confidence ' + str(round(score, 8))
@pytest.mark.parametrize('f', ai_files)
def test_llm_sample(f):
(classification, score) = run_on_file(AI_SAMPLE_DIR + f)
assert classification == 'AI', f + ' is an LLM-generated file, misclassified as human-generated with confidence ' + str(round(score, 8))