Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Maintenance] Outdated end2end evaluation script #186

Closed
JamesCao2048 opened this issue Jan 17, 2024 · 2 comments
Closed

[Maintenance] Outdated end2end evaluation script #186

JamesCao2048 opened this issue Jan 17, 2024 · 2 comments

Comments

@JamesCao2048
Copy link

JamesCao2048 commented Jan 17, 2024

Describe the feature
A clear and concise description of what the feature is.
The end2end evaluation scripts in examples/agent_examples/ seem to be directly copied from ConvLab2, thus have some problems. For example,

  1. The RulePolicy seems not to be supported in ConvLab3 now.
  2. The default dataset ofPPOPolicy in test_BERTNLU-RuleDST-PPOPolicy-TemplateNLG.py is Multiwoz, which is not supported in ConvLab3.

There is a Getting_Started.ipynb point in tutorials/Getting_Started.ipynb, but it only provides an interactive test, not an end2end test.
I add a user pipeline agent to it as below,

from convlab.base_models.t5.nlu import T5NLU
from convlab.base_models.t5.dst import T5DST
from convlab.base_models.t5.nlg import T5NLG
from convlab.nlu.jointBERT.multiwoz import BERTNLU
from convlab.policy.vector.vector_nodes import VectorNodes
from convlab.policy.rule.multiwoz import RulePolicy
from convlab.policy.vtrace_DPT import VTRACE
from convlab.dialog_agent import PipelineAgent, BiSession
from convlab.evaluator.multiwoz_eval import MultiWozEvaluator
from convlab.nlg.template.multiwoz import TemplateNLG
from pprint import pprint
import random
import numpy as np
import torch
from convlab.util.analysis_tool.analyzer import Analyzer
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from convlab.policy.tus.multiwoz.TUS import UserPolicy
from convlab.dst.rule.multiwoz.usr_dst import UserRuleDST
import json

def set_seed(r_seed):
    random.seed(r_seed)
    np.random.seed(r_seed)
    torch.manual_seed(r_seed)
    
def test_end2end(seed=20200202, n_dialogues=1000):
    # go to README.md of each model for more information
    sys_nlu = T5NLU(speaker='user', context_window_size=0, model_name_or_path='../../models/t5-small-nlu-multiwoz21')
    sys_dst = T5DST(dataset_name='multiwoz21', speaker='user', context_window_size=100, model_name_or_path='../../models/t5-small-dst-multiwoz21')
    # Download pre-trained DDPT model
    # ! wget https://huggingface.co/ConvLab/ddpt-policy-multiwoz21/resolve/main/supervised.pol.mdl --directory-prefix="convlab/policy/vtrace_DPT"
    vectorizer = VectorNodes(dataset_name='multiwoz21',
                            use_masking=True,
                            manually_add_entity_names=True,
                            seed=0,
                            filter_state=True)
    sys_policy = VTRACE(is_train=False,
                seed=0,
                vectorizer=vectorizer,
                load_path="convlab/policy/vtrace_DPT/supervised")
    sys_nlg = T5NLG(speaker='system', context_window_size=0, model_name_or_path='../../models/t5-small-nlg-multiwoz21')
    # assemble
    sys_agent = PipelineAgent(sys_nlu, sys_dst, sys_policy, sys_nlg, name='sys')

     # specify the user config
    user_config = "/mnt/sda/cjm/ConvLab-3/convlab/policy/tus/multiwoz/exp/default.json"
    user_mode = ""
    # BERT nlu trained on sys utterance
    user_nlu = BERTNLU(mode='sys', config_file='multiwoz_sys_context.json',
                       model_file='/mnt/sda/cjm/ConvLab-3/models/bert_multiwoz_sys_context.zip')
    user_dst = None
    # rule policy
    user_config = json.load(open(user_config))

    if user_mode:
        user_config["model_name"] = f"{user_config['model_name']}-{user_mode}"
    user_policy = UserPolicy(user_config)
    # user_policy =  RulePolicy(character='usr')
    # template NLG
    user_nlg = TemplateNLG(is_user=True)
    # assemble
    user_agent = PipelineAgent(
        user_nlu, user_dst, user_policy, user_nlg, name='user')

    analyzer = Analyzer(user_agent=user_agent, dataset='multiwoz')

    set_seed(seed)
    name=f'BERTNLU-RuleDST-RulePolicy-TemplateNLG-Seed{seed}'
    analyzer.comprehensive_analyze(sys_agent=sys_agent, model_name=name, total_dialog=n_dialogues)
    
if __name__ == '__main__':
    # Get arguments
    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', help='Seed', default=20200202, type=int)
    parser.add_argument('--n_dialogues', help='Number of eval dialogues', default=1000, type=int)
    args = parser.parse_args()

    test_end2end(seed=args.seed, n_dialogues=args.n_dialogues)

,but got this error

  File "/mnt/sda/cjm/ConvLab-3/examples/cjm_examples/getting_started.py", line 86, in <module>
    test_end2end(seed=args.seed, n_dialogues=args.n_dialogues)
  File "/mnt/sda/cjm/ConvLab-3/examples/cjm_examples/getting_started.py", line 77, in test_end2end
    analyzer.comprehensive_analyze(sys_agent=sys_agent, model_name=name, total_dialog=n_dialogues)
  File "/mnt/sda/cjm/ConvLab-3/convlab/util/analysis_tool/analyzer.py", line 119, in comprehensive_analyze
    sys_response, user_response, session_over, reward = sess.next_turn(
  File "/mnt/sda/cjm/ConvLab-3/convlab/dialog_agent/session.py", line 122, in next_turn
    user_response = self.next_response(last_observation)
  File "/mnt/sda/cjm/ConvLab-3/convlab/dialog_agent/session.py", line 96, in next_response
    response = next_agent.response(observation)
  File "/mnt/sda/cjm/ConvLab-3/convlab/dialog_agent/agent.py", line 176, in response
    self.output_action = deepcopy(self.policy.predict(state))
  File "/mnt/sda/cjm/ConvLab-3/convlab/policy/tus/multiwoz/TUS.py", line 420, in predict
    return self.policy.predict(state)
  File "/mnt/sda/cjm/ConvLab-3/convlab/policy/tus/multiwoz/TUS.py", line 81, in predict
    sys_dialog_act = state["system_action"]
TypeError: list indices must be integers or slices, not str

If I change the user policy to RulePolicy, I met this error,

Traceback (most recent call last):
  File "/mnt/sda/cjm/ConvLab-3/examples/cjm_examples/getting_started.py", line 86, in <module>
    test_end2end(seed=args.seed, n_dialogues=args.n_dialogues)
  File "/mnt/sda/cjm/ConvLab-3/examples/cjm_examples/getting_started.py", line 77, in test_end2end
    analyzer.comprehensive_analyze(sys_agent=sys_agent, model_name=name, total_dialog=n_dialogues)
  File "/mnt/sda/cjm/ConvLab-3/convlab/util/analysis_tool/analyzer.py", line 119, in comprehensive_analyze
    sys_response, user_response, session_over, reward = sess.next_turn(
  File "/mnt/sda/cjm/ConvLab-3/convlab/dialog_agent/session.py", line 124, in next_turn
    self.evaluator.add_sys_da(self.user_agent.get_in_da_eval(), self.sys_agent.dst.state['belief_state'])
  File "/mnt/sda/cjm/ConvLab-3/convlab/evaluator/multiwoz_eval.py", line 192, in add_sys_da
    if not self.booked[domain] and re.match(r'^\d{8}$', value) and \
KeyError: 'booking'

Expected behavior
End2End tests adapted to ConvLab3 are needed.

Additional context
Add any other context about the feature here.

@zqwerty
Copy link
Member

zqwerty commented Jan 21, 2024

Sorry for the late reply. Yes the RulePolicy is not supported in ConvLab-3 because the mismatch of ontology between ConvLab-2 and ConvLab-3. Have you seen this issue (#152) for end-to-end testing of TUS ?

@JamesCao2048
Copy link
Author

Thanks, this address my problem

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants