From 0fdb97a5609b13081d0da1a59b99d374a400b9cf Mon Sep 17 00:00:00 2001 From: BeachWang <1400012807@pku.edu.cn> Date: Wed, 17 Jul 2024 13:07:43 +0800 Subject: [PATCH] Dev/rebuild sandbox (#332) * sandbox rebuild v1 * switch * fix hpo 3sigma * after pre-commit * sandbox readme zh * finish doc * other_configs -> extra_configs * other_configs -> extra_configs * res_name -> meta_name * hooker -> hook * analyze -> analyse * after pre-commit * analyse -> analyze * analyser.py -> analyzer.py * analyser.py -> analyzer.py * analyser.py -> analyzer.py * regist -> register, DICT -> MAPPING --- README.md | 8 +- README_ZH.md | 6 +- app.py | 8 +- configs/README.md | 2 +- configs/config_all.yaml | 8 +- configs/demo/analyser.yaml | 4 +- configs/demo/process.yaml | 1 + .../gpt3_data_quality_eval_config.yaml | 1 + .../demo/sandbox/gpt3_extra_train_config.json | 2 + .../demo/sandbox/gpt3_extra_train_config.yaml | 2 + configs/demo/sandbox/sandbox.yaml | 81 ++++-- data_juicer/analysis/column_wise_analysis.py | 8 +- data_juicer/analysis/diversity_analysis.py | 16 +- data_juicer/analysis/overall_analysis.py | 14 +- data_juicer/config/__init__.py | 3 +- data_juicer/config/config.py | 99 ++++--- data_juicer/core/__init__.py | 4 +- data_juicer/core/{analyser.py => analyzer.py} | 14 +- data_juicer/core/sandbox/hooks.py | 267 +++++++++++++++++ data_juicer/core/sandbox/pipelines.py | 269 +++++------------- data_juicer/utils/constant.py | 15 +- .../data/the-pile-nih-refined.jsonl | 2 +- demos/data_process_loop/app.py | 6 +- demos/data_visualization_diversity/app.py | 6 +- demos/data_visualization_op_effect/app.py | 4 +- demos/data_visualization_statistics/app.py | 4 +- demos/overview_scan/app.py | 18 +- demos/process_cft_zh_data/app.py | 6 +- demos/process_code_data/app.py | 4 +- demos/process_sci_data/app.py | 4 +- demos/process_sci_data/data/arxiv.jsonl | 4 +- docs/Sandbox-ZH.md | 108 ++++--- docs/Sandbox.md | 105 ++++--- tools/analyze_data.py | 6 +- tools/hpo/execute_hpo_3sigma.py | 30 +- tools/sandbox_starter.py | 147 ++++++---- 36 files changed, 797 insertions(+), 489 deletions(-) rename data_juicer/core/{analyser.py => analyzer.py} (94%) create mode 100644 data_juicer/core/sandbox/hooks.py diff --git a/README.md b/README.md index 980ea29b2..e50320c54 100644 --- a/README.md +++ b/README.md @@ -276,17 +276,17 @@ python tools/process_data.py --config ./demos/process_video_on_ray/configs/demo. ### Data Analysis -- Run `analyze_data.py` tool or `dj-analyze` command line tool with your config as the argument to analyse your dataset. +- Run `analyze_data.py` tool or `dj-analyze` command line tool with your config as the argument to analyze your dataset. ```shell # only for installation from source -python tools/analyze_data.py --config configs/demo/analyser.yaml +python tools/analyze_data.py --config configs/demo/analyzer.yaml # use command line tool -dj-analyze --config configs/demo/analyser.yaml +dj-analyze --config configs/demo/analyzer.yaml ``` -- **Note:** Analyser only compute stats of Filter ops. So extra Mapper or Deduplicator ops will be ignored in the analysis process. +- **Note:** Analyzer only compute stats of Filter ops. So extra Mapper or Deduplicator ops will be ignored in the analysis process. ### Data Visualization diff --git a/README_ZH.md b/README_ZH.md index f002981d8..d6dcec5ae 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -262,13 +262,13 @@ python tools/process_data.py --config ./demos/process_video_on_ray/configs/demo. ```shell # 适用于从源码安装 -python tools/analyze_data.py --config configs/demo/analyser.yaml +python tools/analyze_data.py --config configs/demo/analyzer.yaml # 使用命令行工具 -dj-analyze --config configs/demo/analyser.yaml +dj-analyze --config configs/demo/analyzer.yaml ``` -* **注意**:Analyser 只计算 Filter 算子的状态,其他的算子(例如 Mapper 和 Deduplicator)会在分析过程中被忽略。 +* **注意**:Analyzer 只计算 Filter 算子的状态,其他的算子(例如 Mapper 和 Deduplicator)会在分析过程中被忽略。 ### 数据可视化 diff --git a/app.py b/app.py index 82499c062..622236780 100644 --- a/app.py +++ b/app.py @@ -18,7 +18,7 @@ from data_juicer.analysis.diversity_analysis import (DiversityAnalysis, get_diversity) from data_juicer.config import init_configs -from data_juicer.core import Analyser, Executor +from data_juicer.core import Analyzer, Executor from data_juicer.ops.base_op import OPERATORS from data_juicer.utils.constant import Fields, StatsKeys from data_juicer.utils.logger_utils import get_log_file_path @@ -134,7 +134,7 @@ def analyze_and_show_res(): cfg['save_stats_in_one_file'] = True logger.info('=========Stage 1: analyze original data=========') - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) dataset = analyzer.run() overall_file = os.path.join(analyzer.analysis_path, 'overall.csv') @@ -171,7 +171,7 @@ def process_and_show_res(): cfg_for_processed_data.export_path = os.path.dirname( cfg.export_path) + '_processed/data.jsonl' - analyzer = Analyser(cfg_for_processed_data) + analyzer = Analyzer(cfg_for_processed_data) analyzer.analysis_path = os.path.dirname( cfg_for_processed_data.export_path) + '/analysis' analyzer.run() @@ -460,7 +460,7 @@ def diversity(): max_value=100, step=1) - diversity_btn = st.button('Analyse_diversity', + diversity_btn = st.button('Analyze_diversity', use_container_width=True) output_path = os.path.join(os.path.dirname(cfg.export_path), 'analysis') diff --git a/configs/README.md b/configs/README.md index 873c02dc5..dd4dba3cb 100644 --- a/configs/README.md +++ b/configs/README.md @@ -7,7 +7,7 @@ This folder contains some configuration files to allow users to easily understan ```shell # To process your dataset. python tools/process_data.py --config xxx.yaml -# To analyse your dataset. +# To analyze your dataset. python tools/analyze_data.py --config xxx.yaml ``` diff --git a/configs/config_all.yaml b/configs/config_all.yaml index 470865693..41a0809a6 100644 --- a/configs/config_all.yaml +++ b/configs/config_all.yaml @@ -40,19 +40,13 @@ executor_type: default # type of executor, ray_address: auto # the address of the Ray cluster. # only for data analysis -percentiles: [0.25, 0.5, 0.75] # percentiles to analyse the dataset distribution +percentiles: [0.25, 0.5, 0.75] # percentiles to analyze the dataset distribution export_original_dataset: false # whether to export the original dataset with stats. If you only need the stats of the dataset, setting it to false could speed up the exporting. save_stats_in_one_file: false # whether to store all stats result into one file # for sandbox or hpo -model_infer_config: null # path or dict to model inference configuration file when calling model executor in sandbox. Related hooks will be disabled if it's not specified. -model_train_config: null # path or dict to model training configuration file when calling model executor in sandbox. Related hooks will be disabled if it's not specified. -model_eval_config: null # path or dict to model evaluation configuration file when calling model executor in sandbox. Related hooks will be disabled if it's not specified. -data_eval_config: null # path or dict to data evaluation configuration file when calling model executor in sandbox. Related hooks will be disabled if it's not specified. data_probe_algo: 'uniform' # sampling algorithm for dataset. Should be one of ["uniform", "frequency_specified_field_selector", "topk_specified_field_selector"]. It's "uniform" in default. Only used for dataset sampling. data_probe_ratio: 1.0 # the sampling ratio to the original dataset size. It's 1.0 in default. Only used for dataset sampling. -path_k_sigma_recipe: null # path to save a configuration file when using k-sigma tool to refine processing recipes -path_model_feedback_recipe: null # path to save a configuration file refined by model feedback hpo_config: null # path to a configuration file when using auto-HPO tool. diff --git a/configs/demo/analyser.yaml b/configs/demo/analyser.yaml index 3d1e1e40c..7bfef4d79 100644 --- a/configs/demo/analyser.yaml +++ b/configs/demo/analyser.yaml @@ -1,11 +1,11 @@ # Process config example for dataset # global parameters -project_name: 'demo-analyser' +project_name: 'demo-analyzer' dataset_path: './demos/data/demo-dataset.jsonl' # path to your dataset directory or file np: 4 # number of subprocess to process your dataset -export_path: './outputs/demo-analyser/demo-analyser-result.jsonl' +export_path: './outputs/demo-analyzer/demo-analyzer-result.jsonl' # process schedule # a list of several process operators with their arguments diff --git a/configs/demo/process.yaml b/configs/demo/process.yaml index 93aa95698..d2edebb71 100644 --- a/configs/demo/process.yaml +++ b/configs/demo/process.yaml @@ -12,3 +12,4 @@ export_path: './outputs/demo-process/demo-processed.jsonl' process: - language_id_score_filter: lang: 'zh' + min_score: 0.8 diff --git a/configs/demo/sandbox/gpt3_data_quality_eval_config.yaml b/configs/demo/sandbox/gpt3_data_quality_eval_config.yaml index c383a1d34..291523a85 100644 --- a/configs/demo/sandbox/gpt3_data_quality_eval_config.yaml +++ b/configs/demo/sandbox/gpt3_data_quality_eval_config.yaml @@ -1 +1,2 @@ type: dj_text_quality_classifier +dataset_path: './outputs/demo-process/demo-processed.jsonl' diff --git a/configs/demo/sandbox/gpt3_extra_train_config.json b/configs/demo/sandbox/gpt3_extra_train_config.json index a33b450ab..2330df084 100644 --- a/configs/demo/sandbox/gpt3_extra_train_config.json +++ b/configs/demo/sandbox/gpt3_extra_train_config.json @@ -1,5 +1,7 @@ { "type": "modelscope", + "dataset_path": "./outputs/demo-process/demo-processed.jsonl", + "work_dir": "./demos/data/", "model_name": "iic/nlp_gpt3_text-generation_chinese-base", "trainer_name": "nlp-base-trainer", "key_remapping": { diff --git a/configs/demo/sandbox/gpt3_extra_train_config.yaml b/configs/demo/sandbox/gpt3_extra_train_config.yaml index e8ceb084b..c6a1a20a1 100644 --- a/configs/demo/sandbox/gpt3_extra_train_config.yaml +++ b/configs/demo/sandbox/gpt3_extra_train_config.yaml @@ -1,4 +1,6 @@ type: modelscope +dataset_path: './outputs/demo-process/demo-processed.jsonl' +work_dir: './demos/data/' model_name: "iic/nlp_gpt3_text-generation_chinese-base" trainer_name: "nlp-base-trainer" key_remapping: diff --git a/configs/demo/sandbox/sandbox.yaml b/configs/demo/sandbox/sandbox.yaml index 0d8081c97..a250b4ed4 100644 --- a/configs/demo/sandbox/sandbox.yaml +++ b/configs/demo/sandbox/sandbox.yaml @@ -1,27 +1,68 @@ -# Sandbox config example for dataset +# Sandbox config example # global parameters project_name: 'demo-sandbox' -dataset_path: './demos/data/demo-dataset.jsonl' # path to your dataset directory or file -np: 4 # number of subprocess to process your dataset +experiment_name: 'demo-sandbox-run0' # for wandb tracer name +hpo_config: null # path to a configuration file when using auto-HPO tool. -export_path: './outputs/demo-sandbox/demo-sandbox.jsonl' +# configs for each job, the jobs will be executed according to the order in the list +probe_job_configs: + - hook: 'ProbeViaAnalyzerHook' + meta_name: 'analysis_ori_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + # - hook: 'ProbeViaModelInferHook' + # meta_name: 'analysis_ori_model' + # dj_configs: + # dataset_path: './demos/data/demo-dataset.jsonl' + # export_path: './outputs/demo-sandbox/demo-sandbox.jsonl' + # data_probe_algo: 'uniform' + # data_probe_ratio: 0.5 + # extra_configs: + # (...model configs) -# sandbox configs -# for refining recipe using k-sigma rules -path_k_sigma_recipe: './outputs/demo-sandbox/k_sigma_new_recipe.yaml' +refine_recipe_job_configs: + - hook: 'RefineRecipeViaKSigmaHook' + meta_name: 'analysis_ori_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + path_k_sigma_recipe: './outputs/demo-process/k_sigma_new_recipe.yaml' + # - hook: 'RefineRecipeViaModelFeedbackHook' + # meta_name: + # dj_configs: + # extra_configs: + # (...model configs) -# for gpt3 quality classifier as data evaluator -data_eval_config: 'configs/demo/sandbox/gpt3_data_quality_eval_config.yaml' -#data_eval_config: -# type: dj_text_quality_classifier +execution_job_configs: + - hook: 'ProcessDataHook' + meta_name: + dj_configs: './outputs/demo-process/k_sigma_new_recipe.yaml' + extra_configs: + - hook: 'TrainModelHook' + meta_name: + dj_configs: + extra_configs: 'configs/demo/sandbox/gpt3_extra_train_config.json' -# for gpt3 model training -model_train_config: 'configs/demo/sandbox/gpt3_extra_train_config.json' - -# process schedule -# a list of several process operators with their arguments -process: - - language_id_score_filter: - lang: 'zh' - min_score: 0.5 +evaluation_job_configs: + - hook: 'ProbeViaAnalyzerHook' + meta_name: 'analysis_processed_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + # - hook: 'ProbeViaModelInferHook' + # meta_name: 'analysis_trained_model' + # dj_configs: + # dataset_path: './demos/data/demo-dataset.jsonl' + # export_path: './outputs/demo-sandbox/demo-sandbox.jsonl' + # data_probe_algo: 'uniform' + # data_probe_ratio: 0.5 + # extra_configs: + # (...model configs) + - hook: 'EvaluateDataHook' + meta_name: 'eval_data' + dj_configs: + extra_configs: 'configs/demo/sandbox/gpt3_data_quality_eval_config.yaml' + # - hook: 'EvaluateModelHook' + # meta_name: 'eval_model' + # dj_configs: + # oextra_configs: + # (...model configs) diff --git a/data_juicer/analysis/column_wise_analysis.py b/data_juicer/analysis/column_wise_analysis.py index 254de3ce7..775b42683 100644 --- a/data_juicer/analysis/column_wise_analysis.py +++ b/data_juicer/analysis/column_wise_analysis.py @@ -62,7 +62,7 @@ def __init__(self, """ Initialization method - :param dataset: the dataset to be analysed + :param dataset: the dataset to be analyzed :param output_path: path to store the analysis results :param overall_result: optional precomputed overall stats result :param save_stats_in_one_file: whether save all analysis figures of all @@ -73,15 +73,15 @@ def __init__(self, if not os.path.exists(self.output_path): os.makedirs(self.output_path) - # if no overall description provided, analyse it from scratch + # if no overall description provided, analyze it from scratch if overall_result is None: oa = OverallAnalysis(dataset, output_path) - overall_result = oa.analyse() + overall_result = oa.analyze() self.overall_result = overall_result self.save_stats_in_one_file = save_stats_in_one_file - def analyse(self, show_percentiles=False, show=False, skip_export=False): + def analyze(self, show_percentiles=False, show=False, skip_export=False): """ Apply analysis and draw the analysis figure for stats. diff --git a/data_juicer/analysis/diversity_analysis.py b/data_juicer/analysis/diversity_analysis.py index 6a6a0b260..734e751a1 100644 --- a/data_juicer/analysis/diversity_analysis.py +++ b/data_juicer/analysis/diversity_analysis.py @@ -39,9 +39,9 @@ def find_root_verb_and_its_dobj_in_string(nlp, s, first_sent=True): Find the verb and its object closest to the root of lexical tree of input string. - :param nlp: the diversity model to analyse the diversity strings - :param s: the string to be analysed - :param first_sent: whether to analyse the first sentence in the + :param nlp: the diversity model to analyze the diversity strings + :param s: the string to be analyzed + :param first_sent: whether to analyze the first sentence in the input string only. If it's true, return the analysis result of the first sentence no matter it's valid or not. If it's false, return the first valid result over all sentences @@ -87,7 +87,7 @@ class DiversityAnalysis: result.""" def __init__(self, dataset, output_path, lang_or_model='en'): - """Initialization method :param dataset: the dataset to be analysed + """Initialization method :param dataset: the dataset to be analyzed :param output_path: path to store the analysis results :param lang_or_model: the diversity model or a specific language used to load the diversity model.""" @@ -104,7 +104,7 @@ def compute(self, lang_or_model=None, column_name='text'): :param lang_or_model: the diversity model or a specific language used to load the diversity model - :param column_name: the name of column to be analysed + :param column_name: the name of column to be analyzed :return: the analysis result. """ # load diversity model @@ -129,7 +129,7 @@ def find_verb_noun(sample): dataset = self.dataset.map(find_verb_noun) return pd.DataFrame(dataset) - def analyse(self, + def analyze(self, lang_or_model=None, column_name='text', postproc_func=get_diversity, @@ -139,8 +139,8 @@ def analyse(self, :param lang_or_model: the diversity model or a specific language used to load the diversity model - :param column_name: the name of column to be analysed - :param postproc_func: function to analyse diversity. In default, + :param column_name: the name of column to be analyzed + :param postproc_func: function to analyze diversity. In default, it's function get_diversity :param postproc_kwarg: arguments of the postproc_func :return: diff --git a/data_juicer/analysis/overall_analysis.py b/data_juicer/analysis/overall_analysis.py index b68b4551d..04eefb178 100644 --- a/data_juicer/analysis/overall_analysis.py +++ b/data_juicer/analysis/overall_analysis.py @@ -21,7 +21,7 @@ def __init__(self, dataset, output_path): """ Initialization method. - :param dataset: the dataset to be analysed + :param dataset: the dataset to be analyzed :param output_path: path to store the analysis results. """ self.stats = pd.DataFrame(dataset[Fields.stats]) @@ -29,9 +29,9 @@ def __init__(self, dataset, output_path): if not os.path.exists(self.output_path): os.makedirs(self.output_path) - # default percentiles to analyse + # default percentiles to analyze self.default_percentiles = [0.25, 0.5, 0.75] - # supported dtypes of column to be analysed + # supported dtypes of column to be analyzed # Notice: there won't be mixed types in a column because the stats is # obtained from Dataset, which doesn't allow mixed types. # Notice: for now, stats can only be: @@ -48,7 +48,7 @@ def refine_single_column(self, col): if type(first) not in self.supported_object_types: logger.warning(f'There is a column of stats with type ' f'[{type(first)}], which is not supported to be ' - f'analysed for now.') + f'analyzed for now.') return None if type(first) is str: # describe(include = 'all') can analyze the string type @@ -58,13 +58,13 @@ def refine_single_column(self, col): col = col.explode().infer_objects() return col - def analyse(self, percentiles=[], num_proc=1, skip_export=False): + def analyze(self, percentiles=[], num_proc=1, skip_export=False): """ Apply overall analysis on the whole dataset based on the describe method of pandas. - :param percentiles: percentiles to analyse - :param num_proc: number of processes to analyse the dataset + :param percentiles: percentiles to analyze + :param num_proc: number of processes to analyze the dataset :param skip_export: whether export the results to disk :return: the overall analysis result. """ diff --git a/data_juicer/config/__init__.py b/data_juicer/config/__init__.py index b33c6e755..9af053a4e 100644 --- a/data_juicer/config/__init__.py +++ b/data_juicer/config/__init__.py @@ -1,4 +1,5 @@ -from .config import export_config, init_configs, merge_config +from .config import (export_config, get_init_configs, init_configs, + merge_config, prepare_side_configs) __all__ = [ 'init_configs', diff --git a/data_juicer/config/config.py b/data_juicer/config/config.py index d05b50c3d..f252f4999 100644 --- a/data_juicer/config/config.py +++ b/data_juicer/config/config.py @@ -1,10 +1,13 @@ import copy +import json import os import shutil +import tempfile import time from argparse import ArgumentError, Namespace from typing import Dict, List, Tuple, Union +import yaml from jsonargparse import (ActionConfigFile, ArgumentParser, dict_to_namespace, namespace_to_dict) from jsonargparse.typehints import ActionTypeHint @@ -28,7 +31,7 @@ def init_configs(args=None): 4. hard-coded defaults :param args: list of params, e.g., ['--conifg', 'cfg.yaml'], defaut None. - :return: a global cfg object used by the Executor or Analyser + :return: a global cfg object used by the Executor or Analyzer """ parser = ArgumentParser(default_env=True, default_config_files=None) @@ -42,44 +45,6 @@ def init_configs(args=None): type=str, help='Path to a configuration file when using auto-HPO tool.', required=False) - parser.add_argument( - '--path_k_sigma_recipe', - type=str, - help='Path to save a configuration file when using k-sigma tool.', - required=False) - parser.add_argument( - '--path_model_feedback_recipe', - type=str, - help='Path to save a configuration file refined by model feedback.', - required=False) - parser.add_argument( - '--model_infer_config', - type=Union[str, dict], - help='Path or a dict to model inference configuration file when ' - 'calling model executor in sandbox. If not specified, the model ' - 'inference related hooks will be disabled.', - required=False) - parser.add_argument( - '--model_train_config', - type=Union[str, dict], - help='Path or a dict to model training configuration file when ' - 'calling model executor in sandbox. If not specified, the model ' - 'training related hooks will be disabled.', - required=False) - parser.add_argument( - '--data_eval_config', - type=Union[str, dict], - help='Path or a dict to eval configuration file when calling ' - 'auto-evaluator for data in sandbox. ' - 'If not specified, the eval related hooks will be disabled.', - required=False) - parser.add_argument( - '--model_eval_config', - type=Union[str, dict], - help='Path or a dict to eval configuration file when calling ' - 'auto-evaluator for model in sandbox. ' - 'If not specified, the eval related hooks will be disabled.', - required=False) parser.add_argument( '--data_probe_algo', type=str, @@ -117,6 +82,7 @@ def init_configs(args=None): parser.add_argument( '--dataset_path', type=str, + default='', help='Path to datasets with optional weights(0.0-1.0), 1.0 as ' 'default. Accepted format: dataset1-path dataset2-path ' ' dataset3-path ...') @@ -299,13 +265,14 @@ def init_configs(args=None): parser.add_argument( '--process', type=List[Dict], + default=[], help='List of several operators with their arguments, these ops will ' 'be applied to dataset in order') parser.add_argument( '--percentiles', type=List[float], default=[], - help='Percentiles to analyse the dataset distribution. Only used in ' + help='Percentiles to analyze the dataset distribution. Only used in ' 'Analysis.') parser.add_argument( '--export_original_dataset', @@ -410,6 +377,9 @@ def init_setup_from_cfg(cfg): cfg.dataset_dir = cfg.dataset_path else: cfg.dataset_dir = os.path.dirname(cfg.dataset_path) + elif cfg.dataset_path == '': + logger.warning('dataset_path is empty by default.') + cfg.dataset_dir = '' else: logger.warning(f'dataset_path [{cfg.dataset_path}] is not a valid ' f'local path. Please check and retry, otherwise we ' @@ -698,6 +668,8 @@ def export_config(cfg, global global_parser if not global_parser: init_configs() # enable the customized type parser + if isinstance(cfg_to_export, Namespace): + cfg_to_export = namespace_to_dict(cfg_to_export) global_parser.save(cfg=cfg_to_export, path=path, format=format, @@ -765,3 +737,50 @@ def merge_config(ori_cfg, new_cfg: Dict): except ArgumentError: logger.error('Config merge failed') + + +def prepare_side_configs(ori_config): + """ + parse the config if ori_config is a string of a config file path with + yaml, yml or json format + + :param ori_config: a config dict or a string of a config file path with + yaml, yml or json format + + :return: a config dict + """ + + if isinstance(ori_config, str): + # config path + if ori_config.endswith('.yaml') or ori_config.endswith('.yml'): + with open(ori_config) as fin: + config = yaml.safe_load(fin) + elif ori_config.endswith('.json'): + with open(ori_config) as fin: + config = json.load(fin) + else: + raise TypeError(f'Unrecognized config file type: [{ori_config}]. ' + f'Should be one of the types [".yaml", ".yml", ' + f'".json"].') + elif isinstance(ori_config, dict) or isinstance(ori_config, Namespace): + config = ori_config + else: + raise TypeError( + f'Unrecognized side config type: [{type(ori_config)}].') + + return config + + +def get_init_configs(cfg): + """ + set init configs of datajucer for cfg + """ + temp_dir = tempfile.gettempdir() + temp_file = os.path.join(temp_dir, 'job_dj_config.json') + if isinstance(cfg, Namespace): + cfg = namespace_to_dict(cfg) + # create an temp config file + with open(temp_file, 'w') as f: + json.dump(cfg, f) + inited_dj_cfg = init_configs(['--config', temp_file]) + return inited_dj_cfg diff --git a/data_juicer/core/__init__.py b/data_juicer/core/__init__.py index 28a8c6d39..79ead0f8a 100644 --- a/data_juicer/core/__init__.py +++ b/data_juicer/core/__init__.py @@ -1,11 +1,11 @@ -from .analyser import Analyser +from .analyzer import Analyzer from .data import NestedDataset from .executor import Executor from .exporter import Exporter from .tracer import Tracer __all__ = [ - 'Analyser', + 'Analyzer', 'NestedDataset', 'Executor', 'Exporter', diff --git a/data_juicer/core/analyser.py b/data_juicer/core/analyzer.py similarity index 94% rename from data_juicer/core/analyser.py rename to data_juicer/core/analyzer.py index 1e5891862..42a2e331b 100644 --- a/data_juicer/core/analyser.py +++ b/data_juicer/core/analyzer.py @@ -15,9 +15,9 @@ from .exporter import Exporter -class Analyser: +class Analyzer: """ - This Analyser class is used to analyse a specific dataset. + This Analyzer class is used to analyze a specific dataset. It will compute stats for all filter ops in the config file, apply multiple analysis (e.g. OverallAnalysis, ColumnWiseAnalysis, etc.) @@ -49,7 +49,7 @@ def __init__(self, cfg=None): self.cfg.add_suffix) # prepare exporter and check export path suffix - # NOTICE: no need to export dataset texts for analyser + # NOTICE: no need to export dataset texts for analyzer # (export_ds=False). Instead, only need to export stats # (export_stats=True). logger.info('Preparing exporter...') @@ -73,7 +73,7 @@ def run(self, load_data_np=None, skip_export=False): :param load_data_np: number of workers when loading the dataset. :param skip_export: whether export the results into disk - :return: analysed dataset. + :return: analyzed dataset. """ # 1. format data logger.info('Loading dataset from data formatter...') @@ -129,11 +129,11 @@ def run(self, load_data_np=None, skip_export=False): # 4.1. Only consider fields in Fields.stats # 4.2. For string fields, only consider its histogram # 4.3. For numeric fields, consider its histogram and box - # 4.4. Otherwise, DO NOT analyse + # 4.4. Otherwise, DO NOT analyze logger.info('Applying overall analysis on stats...') overall_analysis = OverallAnalysis(dataset, self.analysis_path) - self.overall_result = overall_analysis.analyse( + self.overall_result = overall_analysis.analyze( percentiles=self.cfg.percentiles, num_proc=self.cfg.np, skip_export=skip_export) @@ -147,6 +147,6 @@ def run(self, load_data_np=None, skip_export=False): overall_result=self.overall_result, save_stats_in_one_file=self.cfg.save_stats_in_one_file, ) - column_wise_analysis.analyse(skip_export=skip_export) + column_wise_analysis.analyze(skip_export=skip_export) return dataset diff --git a/data_juicer/core/sandbox/hooks.py b/data_juicer/core/sandbox/hooks.py new file mode 100644 index 000000000..d3ac97ea9 --- /dev/null +++ b/data_juicer/core/sandbox/hooks.py @@ -0,0 +1,267 @@ +import asyncio +import os + +from jsonargparse import dict_to_namespace +from loguru import logger + +from data_juicer.config import get_init_configs, prepare_side_configs +from data_juicer.core import Analyzer +from data_juicer.core import Executor as DjExecutor +from data_juicer.core.sandbox.factories import (data_evaluator_factory, + mode_infer_executor_factory, + model_evaluator_factory, + model_train_executor_factory) +from data_juicer.utils.constant import JobRequiredKeys +from tools.hpo.execute_hpo_3sigma import modify_recipe_k_sigma + + +class BaseHook: + + def __init__(self, job_cfg, watcher, *args, **kwargs): + self.job_cfg = job_cfg + self.watcher = watcher + self.meta_name = job_cfg[JobRequiredKeys.meta_name.value] + self.dj_cfg = job_cfg[JobRequiredKeys.dj_configs.value] + self.other_cfg = job_cfg[JobRequiredKeys.extra_configs.value] + + def hook(self, **kwargs): + raise NotImplementedError + + def specify_dj_and_extra_configs(self): + if self.dj_cfg: + logger.info('Parsing Data-Juicer configs in the job.') + self.dj_cfg = prepare_side_configs(self.dj_cfg) + # require Data-Juicer data process in some jobs + # so we need to init the Data-Juicer data process configs + self.inited_dj_cfg = get_init_configs(self.dj_cfg) + self.dj_cfg = dict_to_namespace(self.dj_cfg) + else: + self.inited_dj_cfg = get_init_configs({}) + if self.other_cfg: + logger.info('Parsing other configs in the job.') + self.other_cfg = prepare_side_configs(self.other_cfg) + self.other_cfg = dict_to_namespace(self.other_cfg) + + +class ProbeViaAnalyzerHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for probing the data via Analyzer + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(ProbeViaAnalyzerHook, self).__init__(job_cfg, watcher, *args, + **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + analyzer = Analyzer(self.inited_dj_cfg) + # probe the data via Analyzer + logger.info('Begin to analyze data') + analyzer.run() + analyzer_res = analyzer.overall_result + # drop string rows to avoid unaligned dtypes + string_rows = ['unique', 'top', 'freq'] + for row_name in string_rows: + if row_name in analyzer_res.index: + analyzer_res = analyzer_res.drop(row_name) + self.watcher.watch(analyzer_res, self.meta_name) + return kwargs + + +class ProbeViaModelInferHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for probing the data via Model Infer + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(ProbeViaModelInferHook, self).__init__(job_cfg, watcher, *args, + **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + data_executor = DjExecutor(self.inited_dj_cfg) + model_infer_executor = mode_infer_executor_factory(self.other_cfg) + # TODO + # probe the model (calling inference sub-pipeline) based on + # original data, such that we know what is the "hard" data for + # the model and how to process the data accordingly + sampled_data = data_executor.sample_data( + sample_ratio=self.inited_dj_cfg.data_probe_ratio, + sample_algo=self.inited_dj_cfg.data_probe_algo, + ) + res_type, infer_res = model_infer_executor.run( + model_infer_executor.model_config['type'], sampled_data) + self.watcher.watch(infer_res, self.meta_name) + return kwargs + + +class RefineRecipeViaKSigmaHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for refining the recipe via K Sigma + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(RefineRecipeViaKSigmaHook, + self).__init__(job_cfg, watcher, *args, **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + path_k_sigma_recipe = self.other_cfg.path_k_sigma_recipe + # use k-sigma strategy to modify the data recipe + modify_recipe_k_sigma(self.dj_cfg, self.watcher.query(self.meta_name), + path_k_sigma_recipe) + return kwargs + + +class RefineRecipeViaModelFeedbackHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for refining the recipe via Model Feedback + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(RefineRecipeViaModelFeedbackHook, + self).__init__(job_cfg, watcher, *args, **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + # TODO + # use model-feedback-based strategy to modify the data recipe, + # e.g., more mapper on the "hard" or "sensitive" data, those were + # ranked by user-interested measurement after model inference + if self.sandbox_cfg.path_model_feedback_recipe is not None: + # modify_recipe_model_feedback( + # self.sandbox_cfg, + # self.watcher.query("measure_on_infer_res"), + # self.sandbox_cfg.path_model_feedback_recipe) + raise NotImplementedError('Not implemented yet.') + return kwargs + + +class ProcessDataHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for processing the data via Data-Juicer + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(ProcessDataHook, self).__init__(job_cfg, watcher, *args, + **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + data_executor = DjExecutor(self.inited_dj_cfg) + # basic routine to process data, users can customize this freely + logger.info('Begin to process the data with given dj recipe') + data_executor.run() + return kwargs + + +class TrainModelHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for model training + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(TrainModelHook, self).__init__(job_cfg, watcher, *args, **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + model_trainer = model_train_executor_factory(self.other_cfg, + watcher=self.watcher) + # basic routine to train model via the processed data, + # users can customize this freely + logger.info('Begin to train the model with given model config') + # update training dataset path + training_args = { + 'train_dataset': + self.other_cfg.dataset_path, + 'work_dir': + os.path.join(self.other_cfg.work_dir, 'model_trainer_outputs'), + } + asyncio.run( + model_trainer.run(model_trainer.model_config['type'], + training_args, **kwargs)) + return kwargs + + +class EvaluateDataHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for data evaluation + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(EvaluateDataHook, self).__init__(job_cfg, watcher, *args, + **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + data_evaluator = data_evaluator_factory(self.other_cfg) + # basic routine to evaluate the given data, + # users can customize this freely + logger.info('Begin to evaluate the data with given evaluator config') + processed_dataset = self.other_cfg.dataset_path + eval_res = data_evaluator.run(eval_type='data', + eval_obj=processed_dataset, + **kwargs) + self.watcher.watch(eval_res, self.meta_name) + return kwargs + + +class EvaluateModelHook(BaseHook): + + def __init__(self, job_cfg, watcher, *args, **kwargs): + """ + Initialize the hook for model evaluation + + :param job_cfg: the job configs + :param watcher: for watching the result + """ + super(EvaluateModelHook, self).__init__(job_cfg, watcher, *args, + **kwargs) + + def hook(self, **kwargs): + self.specify_dj_and_extra_configs() + model_evaluator = model_evaluator_factory(self.other_cfg) + # basic routine to evaluate the given model, + # users can customize this freely + logger.info('Begin to evaluate the model with given evaluator config') + model_evaluator.run(kwargs) + return kwargs + + +HOOK_MAPPING = { + 'ProbeViaAnalyzerHook': ProbeViaAnalyzerHook, + 'ProbeViaModelInferHook': ProbeViaModelInferHook, + 'RefineRecipeViaKSigmaHook': RefineRecipeViaKSigmaHook, + 'RefineRecipeViaModelFeedbackHook': RefineRecipeViaModelFeedbackHook, + 'ProcessDataHook': ProcessDataHook, + 'TrainModelHook': TrainModelHook, + 'EvaluateDataHook': EvaluateDataHook, + 'EvaluateModelHook': EvaluateModelHook, +} + + +def register_hook(job_cfg, watcher): + if job_cfg.hook not in HOOK_MAPPING: + raise ValueError('Undefined hook: [{job_cfg.hook}].') + return HOOK_MAPPING[job_cfg.hook](job_cfg, watcher) diff --git a/data_juicer/core/sandbox/pipelines.py b/data_juicer/core/sandbox/pipelines.py index c10b51043..d07a449b0 100644 --- a/data_juicer/core/sandbox/pipelines.py +++ b/data_juicer/core/sandbox/pipelines.py @@ -1,22 +1,12 @@ -import asyncio -import os.path from typing import List import wandb import yaml from jsonargparse import Namespace as JsonNamespace from jsonargparse import namespace_to_dict -from loguru import logger -from data_juicer.config import init_configs, merge_config -from data_juicer.core import Analyser -from data_juicer.core import Executor as DjExecutor -from data_juicer.core.sandbox.factories import (data_evaluator_factory, - mode_infer_executor_factory, - model_evaluator_factory, - model_train_executor_factory) -from data_juicer.utils.file_utils import add_suffix_to_filename -from tools.hpo.execute_hpo_3sigma import modify_recipe_k_sigma +from data_juicer.config import merge_config +from data_juicer.core.sandbox.hooks import register_hook class SandBoxWatcher: @@ -25,17 +15,18 @@ class SandBoxWatcher: within the sandbox based on WandB UI and it's utilities. """ - def __init__(self, dj_cfg): + def __init__(self, sandbox_cfg): """ Initialize the watcher with a reference to an executor instance. """ # the web-ui and experiment versioning is based on WandB - project_name = dj_cfg.project_name - hpo_config = dj_cfg.hpo_config - self.dj_cfg = dj_cfg + project_name = sandbox_cfg.project_name + experiment_name = sandbox_cfg.experiment_name + hpo_config = sandbox_cfg.hpo_config + self.sandbox_cfg = sandbox_cfg - self.wandb_run = wandb.init(project=project_name) + self.wandb_run = wandb.init(project=project_name, name=experiment_name) if (hpo_config is not None and 'metric' in hpo_config and 'name' in hpo_config['metric']): self.object_name_in_hpo = hpo_config['metric']['name'] @@ -43,13 +34,13 @@ def __init__(self, dj_cfg): self.object_name_in_hpo = None self.logged_res = {} - def query(self, res_name: str): + def query(self, meta_name: str): """ Query the result from the logged_res. """ - return self.logged_res.get(res_name) + return self.logged_res.get(meta_name) - def watch(self, res, res_name: str = ''): + def watch(self, res, meta_name: str = ''): """ Flatten the result in dot structure and log it into WandB. """ @@ -57,27 +48,27 @@ def watch(self, res, res_name: str = ''): for key, value in res.items(): # getting the left nodes of the given res dictionary. if isinstance(value, dict): - self.watch(value, f'{res_name}.{key}') + self.watch(value, f'{meta_name}.{key}') else: - self.logged_res[f'{res_name}.{key}'] = value - if self.object_name_in_hpo == f'{res_name}.{key}': + self.logged_res[f'{meta_name}.{key}'] = value + if self.object_name_in_hpo == f'{meta_name}.{key}': # Ensuring float results for HPO experiments value = float(value) - self.wandb_run.log({f'{res_name}.{key}': value}) + self.wandb_run.log({f'{meta_name}.{key}': value}) else: - self.logged_res[res_name] = res - if res_name == self.object_name_in_hpo: + self.logged_res[meta_name] = res + if meta_name == self.object_name_in_hpo: res = float(res) - self.wandb_run.log({res_name: res}) + self.wandb_run.log({meta_name: res}) def setup_sweep(self, hpo_config: dict = None, project_name: str = None): """ Setup and start a new WandB sweep. """ if hpo_config is None: - hpo_config = self.dj_cfg.hpo_config + hpo_config = self.sandbox_cfg.hpo_config if project_name is None: - project_name = self.dj_cfg.project_name + project_name = self.sandbox_cfg.project_name sweep_id = wandb.sweep(sweep=hpo_config, project=project_name) if (hpo_config is not None and 'metric' in hpo_config and 'name' in hpo_config['metric']): @@ -104,7 +95,7 @@ def watch_cfgs(self, cfgs: List[tuple] = None): for key, val in converged_cfg.items(): merged_cfgs[f'{cfg_prefix}.{key}'] = val else: - merged_cfgs = namespace_to_dict(self.dj_cfg) + merged_cfgs = namespace_to_dict(self.sandbox_cfg) wandb.config.update(merged_cfgs) @@ -123,187 +114,52 @@ class SandBoxExecutor: def __init__( self, - dj_cfg=None, - model_infer_cfg=None, - model_train_cfg=None, - data_eval_cfg=None, - model_eval_cfg=None, + cfg=None, ): """ Initialization method. - :param dj_cfg: configuration of data-juicer, - for data recipe and sandbox (e.g., HPO and leveraged tools). - :param model_infer_cfg: configuration of - an integrated model inference utility. - :param model_train_cfg: configuration of - an integrated model training utility. - :param data_eval_cfg: configuration of an - integrated auto-evaluation utility for data. - :param model_eval_cfg: configuration of an - integrated auto-evaluation utility for model. + :param cfg: configuration of sandbox. """ - self.dj_cfg = init_configs() if dj_cfg is None else dj_cfg - - self.watcher = SandBoxWatcher(self.dj_cfg) - self.watcher.watch_cfgs([ - (dj_cfg, 'data_juicer'), - (model_infer_cfg, 'model_infer'), - (model_train_cfg, 'model_train'), - (data_eval_cfg, 'data_eval'), - (model_eval_cfg, 'model_eval'), - ]) - - self.data_executor = DjExecutor(self.dj_cfg) - self.model_infer_executor = mode_infer_executor_factory( - model_infer_cfg) - self.model_trainer = model_train_executor_factory(model_train_cfg, - watcher=self.watcher) - self.data_evaluator = data_evaluator_factory(data_eval_cfg) - self.model_evaluator = model_evaluator_factory(model_eval_cfg) - - # default jobs to probe, refine_recipe, execution and evaluation for + self.cfg = cfg + + self.watcher = SandBoxWatcher(self.cfg) + self.watcher.watch_cfgs([(cfg, 'sandbox')]) + + # jobs to probe, refine_recipe, execution and evaluation for # interested data and model within the sandbox self.probe_jobs = [] self.refine_recipe_jobs = [] self.execution_jobs = [] self.evaluation_jobs = [] - self.register_default_jobs() - - def hook_probe_via_analyzer(self, args: dict, **kwargs): - # probe the data via Analyser - logger.info('Begin to analyze data') - analyser = Analyser(self.dj_cfg) - analyser.run() - analyser_res = analyser.overall_result - # drop string rows to avoid unaligned dtypes - string_rows = ['unique', 'top', 'freq'] - for row_name in string_rows: - if row_name in analyser_res.index: - analyser_res = analyser_res.drop(row_name) - self.watcher.watch(analyser_res, args['res_name']) - - def hook_probe_via_model_infer(self, args: dict, **kwargs): - # TODO - # probe the model (calling inference sub-pipeline) based on - # original data, such that we know what is the "hard" data for - # the model and how to process the data accordingly - if self.model_infer_executor is not None: - sampled_data = self.data_executor.sample_data( - sample_ratio=self.dj_cfg.data_probe_ratio, - sample_algo=self.dj_cfg.data_probe_algo, - ) - res_type, infer_res = self.model_infer_executor.run( - self.model_infer_executor.model_config['type'], sampled_data) - self.watcher.watch({args['res_name']: infer_res}) - - def hook_refine_recipe_via_k_sigma(self, args: dict, **kwargs): - # use k-sigma strategy to modify the data recipe - if self.dj_cfg.path_k_sigma_recipe is not None: - modify_recipe_k_sigma(self.dj_cfg, - self.watcher.query(args['res_name']), - self.dj_cfg.path_k_sigma_recipe) - - def hook_refine_recipe_via_model_feedback(self, args: dict, **kwargs): - # TODO - # use model-feedback-based strategy to modify the data recipe, - # e.g., more mapper on the "hard" or "sensitive" data, those were - # ranked by user-interested measurement after model inference - if self.dj_cfg.path_model_feedback_recipe is not None: - # modify_recipe_model_feedback( - # self.dj_cfg, - # self.watcher.query("measure_on_infer_res"), - # self.dj_cfg.path_model_feedback_recipe) - raise NotImplementedError('Not implemented yet.') - - def hook_process_data(self, args: dict, **kwargs): - # basic routine to process data, users can customize this freely - logger.info('Begin to process the data with given dj recipe') - self.data_executor.run() - # update the input dataset path to the processed dataset path - processed_ds_path = self.dj_cfg.export_path - new_analyzed_ds = add_suffix_to_filename(processed_ds_path, - '_processed') - self.dj_cfg.dataset_path = processed_ds_path - self.dj_cfg.export_path = new_analyzed_ds - - def hook_train_model(self, args: dict, **kwargs): - if not self.model_trainer: - return - - # basic routine to train model via the processed data, - # users can customize this freely - logger.info('Begin to train the model with given model config') - # update training dataset path - training_args = { - 'train_dataset': - self.dj_cfg.dataset_path, - 'work_dir': - os.path.join(self.dj_cfg.work_dir, 'model_trainer_outputs'), - } - asyncio.run( - self.model_trainer.run(self.model_trainer.model_config['type'], - training_args, **kwargs)) - - def hook_evaluate_data(self, args: dict, **kwargs): - if not self.data_evaluator: - return - - # basic routine to evaluate the given data, - # users can customize this freely - logger.info('Begin to evaluate the data with given evaluator config') - processed_dataset = self.dj_cfg.dataset_path - eval_res = self.data_evaluator.run(eval_type='data', - eval_obj=processed_dataset, - **kwargs) - self.watcher.watch(eval_res, args['res_name']) - - def hook_evaluate_model(self, args: dict, **kwargs): - if not self.model_evaluator: - return - - # basic routine to evaluate the given model, - # users can customize this freely - logger.info('Begin to evaluate the model with given evaluator config') - self.model_evaluator.run(kwargs) - - def register_default_jobs(self): - self.probe_jobs.append((self.hook_probe_via_analyzer, { - 'res_name': 'analysis_ori_data' - })) - self.probe_jobs.append((self.hook_probe_via_model_infer, { - 'res_name': 'analysis_ori_model' - })) - - self.refine_recipe_jobs.append((self.hook_refine_recipe_via_k_sigma, { - 'res_name': 'analysis_ori_data' - })) - self.refine_recipe_jobs.append( - (self.hook_refine_recipe_via_model_feedback, None)) - - self.execution_jobs.append((self.hook_process_data, None)) - self.execution_jobs.append((self.hook_train_model, None)) - - self.evaluation_jobs.append((self.hook_probe_via_analyzer, { - 'res_name': 'analysis_processed_data' - })) - self.evaluation_jobs.append((self.hook_probe_via_model_infer, { - 'res_name': 'analysis_trained_model' - })) - self.evaluation_jobs.append((self.hook_evaluate_data, { - 'res_name': 'eval_data' - })) - self.evaluation_jobs.append((self.hook_evaluate_model, { - 'res_name': 'eval_model' - })) + self.register_jobs() + + def register_jobs(self): + + # register probe_jobs + for job_cfg in self.cfg.probe_job_configs: + self.probe_jobs.append(register_hook(job_cfg, self.watcher)) + + # register refine_recipe_jobs + for job_cfg in self.cfg.refine_recipe_job_configs: + self.refine_recipe_jobs.append(register_hook( + job_cfg, self.watcher)) + + # register execution_jobs + for job_cfg in self.cfg.execution_job_configs: + self.execution_jobs.append(register_hook(job_cfg, self.watcher)) + + # register evaluation_jobs + for job_cfg in self.cfg.evaluation_job_configs: + self.evaluation_jobs.append(register_hook(job_cfg, self.watcher)) def run(self): """ Running the sandbox pipeline at once or in HPO style. """ - if self.dj_cfg.hpo_config is not None: + if self.cfg.hpo_config is not None: # execute_hpo_wandb contains running one_trail with HPO scheduler self.execute_hpo_wandb() else: @@ -318,26 +174,29 @@ def one_trial(self): evaluation metrics to the watcher. """ + # TODO: how the hpo work if self.watcher.object_name_in_hpo is not None: # merge the new hyper-parameters produced by HPO scheduler - self.dj_cfg = merge_config(self.dj_cfg, wandb.config) - self.watcher.watch_cfgs(self.dj_cfg) + self.cfg = merge_config(self.cfg, wandb.config) + self.watcher.watch_cfgs([self.cfg, 'after_hpo']) + + job_infos = {} # ====== Data & model probe ====== - for probe_hook, args in self.probe_jobs: - probe_hook(args) + for probe_hook in self.probe_jobs: + job_infos = probe_hook.hook(**job_infos) # ====== Data-model recipes iteration based on probe results ====== - for refine_job, args in self.refine_recipe_jobs: - refine_job(args) + for refine_hook in self.refine_recipe_jobs: + job_infos = refine_hook.hook(**job_infos) # ====== Data processing & model training ====== - for exec_hook, args in self.execution_jobs: - exec_hook(args) + for exec_hook in self.execution_jobs: + job_infos = exec_hook.hook(**job_infos) # ====== Evaluation on processed data or trained model ====== - for eval_hook, args in self.evaluation_jobs: - eval_hook(args) + for eval_hook in self.evaluation_jobs: + job_infos = eval_hook.hook(**job_infos) def execute_hpo_wandb(self): """ @@ -347,7 +206,7 @@ def execute_hpo_wandb(self): automatically track the results in terms of data, model and specified evaluation metrics to the watcher. """ - with open(self.dj_cfg.hpo_config) as file: + with open(self.cfg.hpo_config) as file: hpo_configuration = yaml.safe_load(file) sweep_id = self.watcher.setup_sweep(hpo_configuration) wandb.agent(sweep_id, diff --git a/data_juicer/utils/constant.py b/data_juicer/utils/constant.py index 0683c3307..13bddb687 100644 --- a/data_juicer/utils/constant.py +++ b/data_juicer/utils/constant.py @@ -2,6 +2,7 @@ import inspect import io import os +from enum import Enum import zstandard as zstd from loguru import logger @@ -93,8 +94,11 @@ def get_access_log(cls, dj_cfg=None): tmp_dj_cfg.use_cache = False tmp_dj_cfg.use_checkpoint = False - from data_juicer.core import Analyser - tmp_analyzer = Analyser(tmp_dj_cfg) + from data_juicer.config import get_init_configs + tmp_dj_cfg = get_init_configs(tmp_dj_cfg) + + from data_juicer.core import Analyzer + tmp_analyzer = Analyzer(tmp_dj_cfg) # do not overwrite the true analysis results tmp_analyzer.run(skip_export=True) @@ -205,3 +209,10 @@ class InterVars(object): # Key: {video_path}-{frame_sampling_method}[-{frame_num}] # {frame_num} is only used when {frame_sampling_method} is "uniform" sampled_frames = DEFAULT_PREFIX + 'sampled_frames' + + +class JobRequiredKeys(Enum): + hook = 'hook' + dj_configs = 'dj_configs' + meta_name = 'meta_name' + extra_configs = 'extra_configs' diff --git a/demos/data_mixture/data/the-pile-nih-refined.jsonl b/demos/data_mixture/data/the-pile-nih-refined.jsonl index 68d24d10c..ea7001c5e 100644 --- a/demos/data_mixture/data/the-pile-nih-refined.jsonl +++ b/demos/data_mixture/data/the-pile-nih-refined.jsonl @@ -5,6 +5,6 @@ {"text":"As the US population continues to diversify, early childhood programs serving the nation's youngest citizens strive to meet the needs of culturally diverse families. The current study seeks to examine the racial-ethnic socialization of preschool-age children in home and school contexts by multiple caregivers. Using cultural-ecological models as a guide, the study looks at four components within each socialization setting: parents\/teachers, family\/peers, physical environments, and the racial-ethnic composition of neighborhoods\/child care programs. Utilizing a proposed sample of approximately 200 three- to five-year-old children and their families that participate in Head Start programs in Upstate New York, the effects of match or mismatch of home and school racial-ethnic socialization on children's racial attitudes, and socioemotional and cognitive development will be examined. The racially and ethnically diverse population of Head Start families will allow for the definition of typologies of socialization between groups and will speak to the variation of child care needs by cultural orientation. Structural equation modeling techniques will allow for examination of the latent constructs of home socialization and school socialization via multiple informants and measures and will determine the pathways of influence between these multidimensional constructs and young children's development. The study's findings have the potential of laying bare the importance of ethnic socialization, regarded as a protective factor, in Head Start children's early academic and social development and for informing early childhood practices. Sample: A minimum of 200 families and teachers of three- to five-year-old who participate in Head Start programs and child care partnerships in Upstate New York Measures: Demographic information ? Parent Demographic survey Home racial-ethnic socialization ? Parental Racial-Ethnic Socialization Behaviors ? Family Socialization ? Africentric Home Environment Inventory ? Neighborhood Racial Composition School racial-ethnic socialization ? Teacher Demographic information ? Teachers will complete a modified version of the Parental Racial-Ethnic Socialization Behaviors measure ? Peers - observation of free play ? Early Childhood Environment Rating Scale-Revised [ECERS-R] ? Head Start Administrator School Racial Composition Racial Attitudes ? Intergroup Attitude Measure Socioemotional Development ? Child Behavior Checklist [CBCL-1 -5] Cognitive Development ? Kaufman Assessment Battery for Children [KABC-II]"} {"text":"The study will expand the research base regarding the effects of one particular early childhood education (ECE) quality improvement strategy - providing individualized instructional feedback to ECE staff on the basis of classroom observations. It will use rigorous, econometric methods to identify treatment effects- exploiting the random selection process to receive observations and feedback as part of the Ohio Department of Education (ODE) Early Language and Literacy Classroom Observation (ELLCO) study. The outcome variables of focus include both ECE staff outcomes (i.e., staff retention and participation in professional development) and child outcomes (i.e., early language and literacy skills and social-emotional development). The study is highly relevant given state efforts as well as state and federal resources expended toward improving child care quality. As state policy makers determine how to implement the I of Quality Rating and Improvement Systems (QRIS), understanding the effectiveness of particular quality improvement efforts is imperative. Evidence of the effectiveness of lower cost, light touch strategies is especially relevant for quality improvement efforts implemented on a statewide scale."} {"text":"This research partnership will examine the validity of the Virgin Islands' Quality Rating and Improvement System by: (1) assessing the measurement strategies and psychometric properties of measures used to assess early care and education (ECE) quality; (2) examining the effects of introducing QRIS and new licensing regulations on the supply and quality of ECE; and (3) examining the developmental trajectories of children to identify predictors of early school success in the VI context."} -{"text":"This research seeks to understand how and why households enter and exit material hardship, the extent to which federal program participation acts as a buffer, and how material hardship, not poverty, affects children and adults over the long-term. Specifically, the research will: (1) Identify how individual-level characteristics (e.g., family type, veteran status) and decisions impact the experience of material hardship. (2) Document how structural determinants at the state level (e.g., use of technology in application procedures) influence individual well-being. (3) Evaluate the consequences of material hardship over the life course (e.g., fertility, college attendance). Econometric techniques will be applied to data from the 2008 panel of the Survey of Income and Program Participation (SIPP) and longitudinal data from the Panel Study of Income Dynamics (PSID). Models of material hardship will be developed based on four domains of need: home, medical, bill-paying, and food hardship. Analyses will include descriptive analyses and individual fixed effects models."} +{"text":"This research seeks to understand how and why households enter and exit material hardship, the extent to which federal program participation acts as a buffer, and how material hardship, not poverty, affects children and adults over the long-term. Specifically, the research will: (1) Identify how individual-level characteristics (e.g., family type, veteran status) and decisions impact the experience of material hardship. (2) Document how structural determinants at the state level (e.g., use of technology in application procedures) influence individual well-being. (3) Evaluate the consequences of material hardship over the life course (e.g., fertility, college attendance). Econometric techniques will be applied to data from the 2008 panel of the Survey of Income and Program Participation (SIPP) and longitudinal data from the Panel Study of Income Dynamics (PSID). Models of material hardship will be developed based on four domains of need: home, medical, bill-paying, and food hardship. Analyzes will include descriptive analyzes and individual fixed effects models."} {"text":"Early environmental stressors are frequently experienced in low-income families and impact outcomes across the lifespan, including academic achievement, mental and physical health, and ultimately result in both earlier mortality and reduced quality of life. Characterizing specific risk trajectories and points of intervention would make more efficient and effective use of limited federal resources. The current proposal works toward these goals with four broad aims. Aim 1. To identify subpopulations with different early environmental risk profiles. This study will identify subpopulations of children who are characterized by different profiles of exposure to a wide range of environmental risks factors (e.g., TANF eligible but not enrolled, poverty threshold, family conflict, birth\/prenatal complications, housing instability, low maternal education, and household composition) at\/near birth, at\/near age one, and at age three using data combined from the NICHD SECCYD, ECLS-B, Baby FACES, and Fragile Families. This study will also examine duration and timing of environmental risks across the first three years of life. Aim 2. To examine the effects of early risk and support profiles on later child outcomes. This study will test the utility of early risk profiles in predicting later academic, health, and socio-emotional child outcomes. Specifically how risk profiles present at birth are predictive of age five and nine outcomes, and how the chronicity\/stability of risk profiles from birth to age three is associated with age five and nine outcomes. Further, this study will examine whether there are children who are more or less susceptible to the negative effects of early risk exposure on later child outcomes. Aim 3. Examine the potential role of modifiable maternal characteristics in the effects of early risk profiles on later child outcomes. Many interventions focus on improving parenting, and two-generation solutions suggest that improving maternal mental health and education\/job training may also help break the link between environmental adversity and negative child outcomes. This study test whether the effects of the risk profiles on outcomes are mediated by maternal depression and\/or positive parenting, hypothesizing that early adversity will contribute to maternal depression, and exacerbate poor child outcomes, while positive parenting will be protective. This study will also test whether changes in maternal education\/job training are protective. Aim 4. Examine the potential moderating role of child care factors on the relationship between risk profiles and later child outcomes. Positive early education experiences are a critical support for families experiencing environmental adversity. Conversely, long hours in low quality care can exacerbate family-level risk. Thus, this study will examine moderation of risk trajectories by child care factors. This study hypothesizes that high quality early child care will serve as a buffer against negative child outcomes, but that poor caregiver mental health, lower quality care and longer hours will exacerbate risk. Further, this study hypothesizes that child care subsidy usage will increase the likelihood of better outcomes, but only when it provides access to higher quality care."} {"text":"The Home Visiting Evidence of Effectiveness project (HomVEE), is a transparent, systematic review of the evidence base for multiple home visiting programs for pregnant women and young children birth to age 5. The project, conducted through a contract to Mathematica Policy Research, aims to improve knowledge about overall efficacy, efficacy by outcomes, information on subgroup populations, and implementation of home visiting models. The project conducts a comprehensive review of the evidence of effectiveness of home visiting program models that support children's health and development, school readiness, reductions in maltreatment, positive parenting practices, and improved family self-sufficiency (when in conjunction with child outcomes). Users can access the site at: ."} diff --git a/demos/data_process_loop/app.py b/demos/data_process_loop/app.py index f5307b57e..93f1a4a4b 100644 --- a/demos/data_process_loop/app.py +++ b/demos/data_process_loop/app.py @@ -7,7 +7,7 @@ from loguru import logger from data_juicer.config import init_configs -from data_juicer.core import Analyser, Executor +from data_juicer.core import Analyzer, Executor from data_juicer.ops.base_op import OPERATORS @@ -93,7 +93,7 @@ def analyze_and_show_res(): cfg['save_stats_in_one_file'] = True logger.info('=========Stage 1: analyze original data=========') - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) analyzed_dataset = analyzer.run() overall_file = os.path.join(analyzer.analysis_path, 'overall.csv') @@ -132,7 +132,7 @@ def process_and_show_res(): cfg_for_processed_data.export_path = os.path.dirname( cfg.export_path) + '_processed/data.jsonl' - analyzer = Analyser(cfg_for_processed_data) + analyzer = Analyzer(cfg_for_processed_data) analyzer.analysis_path = os.path.dirname( cfg_for_processed_data.export_path) + '/analysis' analyzer.run() diff --git a/demos/data_visualization_diversity/app.py b/demos/data_visualization_diversity/app.py index 5950a6d1a..aae9491f0 100644 --- a/demos/data_visualization_diversity/app.py +++ b/demos/data_visualization_diversity/app.py @@ -8,7 +8,7 @@ from data_juicer.analysis.diversity_analysis import (DiversityAnalysis, get_diversity) from data_juicer.config import init_configs -from data_juicer.core import Analyser +from data_juicer.core import Analyzer from data_juicer.ops.base_op import OPERATORS from data_juicer.utils.model_utils import prepare_model, get_model @@ -105,7 +105,7 @@ def load_dataset(dataset_file): del_file = True logger.info('=========Stage: analyze original data=========') - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) dataset = analyzer.formatter.load_dataset() if del_file: @@ -181,7 +181,7 @@ def diversity(): value=0, max_value=100, step=1) - diversity_btn = st.button('Start to analyse Verb-Noun diversity', + diversity_btn = st.button('Start to analyze Verb-Noun diversity', use_container_width=True) with st.expander('Diversity Results ', expanded=True): diff --git a/demos/data_visualization_op_effect/app.py b/demos/data_visualization_op_effect/app.py index 9e4ff4928..65ea6ed58 100644 --- a/demos/data_visualization_op_effect/app.py +++ b/demos/data_visualization_op_effect/app.py @@ -12,7 +12,7 @@ import yaml from data_juicer.config import init_configs -from data_juicer.core import Analyser +from data_juicer.core import Analyzer from data_juicer.ops.base_op import OPERATORS from data_juicer.utils.constant import Fields, StatsKeys @@ -105,7 +105,7 @@ def analyze_and_show_res(dataset_file): cfg.dataset_path = dataset_file.name del_file = True - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) dataset = analyzer.run() overall_file = os.path.join(analyzer.analysis_path, 'overall.csv') diff --git a/demos/data_visualization_statistics/app.py b/demos/data_visualization_statistics/app.py index abae47096..1fd1c8e53 100644 --- a/demos/data_visualization_statistics/app.py +++ b/demos/data_visualization_statistics/app.py @@ -6,7 +6,7 @@ from loguru import logger from data_juicer.config import init_configs -from data_juicer.core import Analyser +from data_juicer.core import Analyzer from data_juicer.ops.base_op import OPERATORS @@ -90,7 +90,7 @@ def analyze_and_show_res(dataset_file): cfg.dataset_path = dataset_file.name del_file = True - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) dataset = analyzer.run() overall_file = os.path.join(analyzer.analysis_path, 'overall.csv') diff --git a/demos/overview_scan/app.py b/demos/overview_scan/app.py index 06cb18033..ae69dfc6d 100644 --- a/demos/overview_scan/app.py +++ b/demos/overview_scan/app.py @@ -6,7 +6,7 @@ import streamlit as st from data_juicer.config import init_configs -from data_juicer.core import Analyser +from data_juicer.core import Analyzer from data_juicer.format.formatter import FORMATTERS from data_juicer.ops.base_op import OPERATORS @@ -21,11 +21,11 @@ features_desc = ''' - **Broad Range of Operators**: Equipped with 50+ core :blue[operators (OPs)], including `Formatters`, `Mappers`, `Filters`, `Deduplicators`, and beyond. -- **Specialized Toolkits**: Feature-rich specialized toolkits such as `Text Quality Classifier`, `Dataset Splitter`, `Analysers`, `Evaluators`, and more that elevate your dataset handling capabilities. +- **Specialized Toolkits**: Feature-rich specialized toolkits such as `Text Quality Classifier`, `Dataset Splitter`, `Analyzers`, `Evaluators`, and more that elevate your dataset handling capabilities. - **Systematic & Reusable**: Empowering users with a systematic library of reusable `config recipes` and `OPs`, designed to function independently of specific datasets, models, or tasks. -- **Data-in-the-loop**: Allowing detailed data analyses with an automated report generation feature for a deeper understanding of your dataset. Coupled with timely multi-dimension automatic evaluation capabilities, it supports a feedback loop at multiple stages in the LLM development process. +- **Data-in-the-loop**: Allowing detailed data analyzes with an automated report generation feature for a deeper understanding of your dataset. Coupled with timely multi-dimension automatic evaluation capabilities, it supports a feedback loop at multiple stages in the LLM development process. - **Comprehensive Processing Recipes**: Offering tens of `pre-built data processing recipes` for pre-training, CFT, en, zh, and more scenarios. @@ -47,13 +47,13 @@ ``` ### Data Analysis -- Run `analyze_data.py` tool with your config as the argument to analyse your dataset. +- Run `analyze_data.py` tool with your config as the argument to analyze your dataset. ```shell -python tools/analyze_data.py --config configs/demo/analyser.yaml +python tools/analyze_data.py --config configs/demo/analyzer.yaml ``` -- **Note:** Analyser only compute stats of Filter ops. So extra Mapper or Deduplicator ops will be ignored in the analysis process. +- **Note:** Analyzer only compute stats of Filter ops. So extra Mapper or Deduplicator ops will be ignored in the analysis process. ### Data Visualization @@ -73,7 +73,7 @@ ```shell # To process your dataset. python tools/process_data.py --config xxx.yaml -# To analyse your dataset. +# To analyze your dataset. python tools/analyze_data.py --config xxx.yaml ``` ''' @@ -121,7 +121,7 @@ def extract_op_desp(markdown_text, header): def run_demo(): - config_file = os.path.join(project_path, 'configs/demo/analyser.yaml') + config_file = os.path.join(project_path, 'configs/demo/analyzer.yaml') data_path = os.path.join(demo_path, 'data/demo-dataset.jsonl') st.markdown(f'dataset: `{data_path}`') start_btn = st.button(' Start to analyze', use_container_width=True) @@ -134,7 +134,7 @@ def run_demo(): cfg['save_stats_in_one_file'] = True if start_btn: - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) with st.spinner('Wait for analyze...'): analyzer.run() diff --git a/demos/process_cft_zh_data/app.py b/demos/process_cft_zh_data/app.py index d4d1aae0c..b30c188c7 100644 --- a/demos/process_cft_zh_data/app.py +++ b/demos/process_cft_zh_data/app.py @@ -7,7 +7,7 @@ from loguru import logger from data_juicer.config import init_configs -from data_juicer.core import Analyser, Executor +from data_juicer.core import Analyzer, Executor from data_juicer.utils.constant import HashKeys demo_path = os.path.dirname(os.path.abspath(__file__)) @@ -96,8 +96,8 @@ def process_and_show_res(): cfg.open_tracer = True cfg.np = 1 cfg.process.pop(0) - logger.info('=========Stage 1: analyze original data=========') - analyzer = Analyser(cfg) + logger.info('=========Stage 1: analsze original data=========') + analyzer = Analyzer(cfg) analyzed_dataset = analyzer.run() logger.info('=========Stage 2: process original data=========') diff --git a/demos/process_code_data/app.py b/demos/process_code_data/app.py index 26b8a6606..418956e23 100644 --- a/demos/process_code_data/app.py +++ b/demos/process_code_data/app.py @@ -7,7 +7,7 @@ from loguru import logger from data_juicer.config import init_configs -from data_juicer.core import Analyser, Executor +from data_juicer.core import Analyzer, Executor from data_juicer.utils.constant import HashKeys demo_path = os.path.dirname(os.path.abspath(__file__)) @@ -97,7 +97,7 @@ def process_and_show_res(): cfg.open_tracer = True cfg.np = 1 logger.info('=========Stage 1: analyze original data=========') - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) analyzed_dataset = analyzer.run() logger.info('=========Stage 2: process original data=========') diff --git a/demos/process_sci_data/app.py b/demos/process_sci_data/app.py index 3bef7de88..64f7c931c 100644 --- a/demos/process_sci_data/app.py +++ b/demos/process_sci_data/app.py @@ -7,7 +7,7 @@ from loguru import logger from data_juicer.config import init_configs -from data_juicer.core import Analyser, Executor +from data_juicer.core import Analyzer, Executor from data_juicer.utils.constant import HashKeys demo_path = os.path.dirname(os.path.abspath(__file__)) @@ -95,7 +95,7 @@ def process_and_show_res(): cfg.open_tracer = True cfg.np = 1 logger.info('=========Stage 1: analyze original data=========') - analyzer = Analyser(cfg) + analyzer = Analyzer(cfg) analyzed_dataset = analyzer.run() logger.info('=========Stage 2: process original data=========') diff --git a/demos/process_sci_data/data/arxiv.jsonl b/demos/process_sci_data/data/arxiv.jsonl index 10b80ea7d..1221350d1 100644 --- a/demos/process_sci_data/data/arxiv.jsonl +++ b/demos/process_sci_data/data/arxiv.jsonl @@ -6,6 +6,6 @@ {"text":"\\section{Water hexamers}\n\\label{}\n\nAll molecular geometries are in in xyz format, with the coordinates\ngiven in Angstroms.\n\n\\begin{small}\n\\begin{verbatim}\n18\nBook hexamer\nO 0.274 1.424 1.149\nH 0.301 2.281 1.584\nH 1.065 1.395 0.561\nO -0.182 -1.395 1.046\nH -1.007 -1.426 0.528\nH -0.094 -0.465 1.303\nO -1.922 1.452 -0.590\nH -1.725 2.060 -1.310\nH -1.152 1.512 0.006\nO -2.533 -1.178 -0.486\nH -3.310 -1.150 0.071\nH -2.345 -0.235 -0.685\nO 2.359 1.101 -0.549\nH 2.137 1.586 -1.351\nH 2.265 0.152 -0.753\nO 2.003 -1.665 -0.565\nH 2.686 -1.978 0.028\nH 1.187 -1.641 -0.017\n\n18\nCage hexamer\nO 0.668 -1.715 -0.324\nH 0.888 -2.653 -0.324\nH 1.530 -1.248 -0.246\nO -0.600 0.461 -1.631\nH -1.491 0.364 -1.254\nH -0.184 -0.399 -1.455\nO 0.704 1.769 0.353\nH 0.776 2.712 0.212\nH 0.238 1.400 -0.435\nO -0.833 -0.335 1.654\nH -0.335 0.487 1.561\nH -0.312 -0.969 1.129\nO 2.829 -0.072 -0.059\nH 3.390 -0.383 0.659\nH 2.313 0.674 0.286\nO -2.928 0.016 -0.061\nH -3.279 -0.858 -0.230\nH -2.258 -0.115 0.647\n\n18\nPrism hexamer\nO -1.409 -0.377 1.473\nH -2.071 -0.903 1.937\nH -0.529 -0.766 1.643\nO -1.599 -0.612 -1.295\nH -2.396 -1.046 -1.619\nH -1.654 -0.691 -0.322\nO -0.931 1.906 -0.094\nH -1.269 1.374 -0.827\nH -1.226 1.389 0.673\nO 1.263 -1.095 1.417\nH 1.596 -0.202 1.266\nH 1.243 -1.449 0.511\nO 1.173 -1.205 -1.425\nH 1.471 -0.300 -1.275\nH 0.212 -1.123 -1.545\nO 1.709 1.371 -0.093\nH 2.214 2.175 -0.205\nH 0.760 1.640 -0.097\n\n18\nRing hexamer\nO -1.228 2.029 0.652\nH -1.731 1.307 0.229\nH -1.025 1.699 1.538\nO -1.145 -2.077 0.650\nH -0.962 -1.739 1.536\nH -0.268 -2.152 0.228\nO 1.159 2.229 -0.654\nH 0.294 2.210 -0.185\nH 0.953 1.985 -1.557\nO 1.351 -2.120 -0.652\nH 1.246 -1.819 -1.556\nH 1.768 -1.362 -0.183\nO 2.374 0.049 0.650\nH 1.999 0.845 0.226\nH 1.991 0.040 1.536\nO -2.512 -0.110 -0.654\nH -2.063 -0.850 -0.185\nH -2.195 -0.166 -1.556\n\\end{verbatim}\n\\end{small}\n\n\\section{6-aug-cc-pVDZ basis set}\n\\label{}\n\n\\begin{small}\n\\begin{verbatim}\n-H 0\n S 3 1.00\n 13.0100000 0.0196850\n 1.9620000 0.1379770\n 0.4446000 0.4781480\n S 1 1.00\n 0.1220000 1.0000000\n S 1 1.00\n 0.0297400 1.0000000\n S 1 1.00\n 0.0072500 1.0000000\n S 1 1.00\n 0.0017674 1.0000000\n S 1 1.00\n 0.0004309 1.0000000\n S 1 1.00\n 0.0001051 1.0000000\n S 1 1.00\n 0.0000256 1.0000000\n P 1 1.00\n 0.7270000 1.0000000\n P 1 1.00\n 0.1410000 1.0000000\n P 1 1.00\n 0.0273000 1.0000000\n P 1 1.00\n 0.0052857 1.0000000\n P 1 1.00\n 0.0010234 1.0000000\n P 1 1.00\n 0.0001981 1.0000000\n P 1 1.00\n 0.0000383 1.0000000\n ****\n-O 0\n S 8 1.00\n 11720.0000000 0.0007100\n 1759.0000000 0.0054700\n 400.8000000 0.0278370\n 113.7000000 0.1048000\n 37.0300000 0.2830620\n 13.2700000 0.4487190\n 5.0250000 0.2709520\n 1.0130000 0.0154580\n S 8 1.00\n 11720.0000000 -0.0001600\n 1759.0000000 -0.0012630\n 400.8000000 -0.0062670\n 113.7000000 -0.0257160\n 37.0300000 -0.0709240\n 13.2700000 -0.1654110\n 5.0250000 -0.1169550\n 1.0130000 0.5573680\n S 1 1.00\n 0.3023000 1.0000000\n S 1 1.00\n 0.0789600 1.0000000\n S 1 1.00\n 0.0206000 1.0000000\n S 1 1.00\n 0.0053744 1.0000000\n S 1 1.00\n 0.0014021 1.0000000\n S 1 1.00\n 0.0003658 1.0000000\n S 1 1.00\n 0.0000954 1.0000000\n P 3 1.00\n 17.7000000 0.0430180\n 3.8540000 0.2289130\n 1.0460000 0.5087280\n P 1 1.00\n 0.2753000 1.0000000\n P 1 1.00\n 0.0685600 1.0000000\n P 1 1.00\n 0.0171000 1.0000000\n P 1 1.00\n 0.0042650 1.0000000\n P 1 1.00\n 0.0010638 1.0000000\n P 1 1.00\n 0.0002653 1.0000000\n P 1 1.00\n 0.0000662 1.0000000\n D 1 1.00\n 1.1850000 1.0000000\n D 1 1.00\n 0.3320000 1.0000000\n D 1 1.00\n 0.0930000 1.0000000\n D 1 1.00\n 0.0260512 1.0000000\n D 1 1.00\n 0.0072975 1.0000000\n D 1 1.00\n 0.0020442 1.0000000\n D 1 1.00\n 0.0000573 1.0000000\n ****\n\\end{verbatim}\n\\end{small}\n\n\\section{q-aug-cc-pVTZ basis set}\n\\label{}\n\n\\begin{small}\n\\begin{verbatim}\n-H 0\n S 3 1.00\n 33.8700000 0.0060680\n 5.0950000 0.0453080\n 1.1590000 0.2028220\n S 1 1.00\n 0.3258000 1.0000000\n S 1 1.00\n 0.1027000 1.0000000\n S 1 1.00\n 0.0252600 1.0000000\n S 1 1.00\n 0.0062100 1.0000000\n S 1 1.00\n 0.0015267 1.0000000\n S 1 1.00\n 0.0003753 1.0000000\n P 1 1.00\n 1.4070000 1.0000000\n P 1 1.00\n 0.3880000 1.0000000\n P 1 1.00\n 0.1020000 1.0000000\n P 1 1.00\n 0.0268000 1.0000000\n P 1 1.00\n 0.0070416 1.0000000\n P 1 1.00\n 0.0018502 1.0000000\n D 1 1.00\n 1.0570000 1.0000000\n D 1 1.00\n 0.2470000 1.0000000\n D 1 1.00\n 0.0577000 1.0000000\n D 1 1.00\n 0.0134789 1.0000000\n D 1 1.00\n 0.0031487 1.0000000\n ****\n-O 0\n S 8 1.00\n 15330.0000000 0.0005080\n 2299.0000000 0.0039290\n 522.4000000 0.0202430\n 147.3000000 0.0791810\n 47.5500000 0.2306870\n 16.7600000 0.4331180\n 6.2070000 0.3502600\n 0.6882000 -0.0081540\n S 8 1.00\n 15330.0000000 -0.0001150\n 2299.0000000 -0.0008950\n 522.4000000 -0.0046360\n 147.3000000 -0.0187240\n 47.5500000 -0.0584630\n 16.7600000 -0.1364630\n 6.2070000 -0.1757400\n 0.6882000 0.6034180\n S 1 1.00\n 1.7520000 1.0000000\n S 1 1.00\n 0.2384000 1.0000000\n S 1 1.00\n 0.0737600 1.0000000\n S 1 1.00\n 0.0228000 1.0000000\n S 1 1.00\n 0.0070477 1.0000000\n S 1 1.00\n 0.0021785 1.0000000\n P 3 1.00\n 34.4600000 0.0159280\n 7.7490000 0.0997400\n 2.2800000 0.3104920\n P 1 1.00\n 0.7156000 1.0000000\n P 1 1.00\n 0.2140000 1.0000000\n P 1 1.00\n 0.0597400 1.0000000\n P 1 1.00\n 0.0167000 1.0000000\n P 1 1.00\n 0.0046684 1.0000000\n P 1 1.00\n 0.0013050 1.0000000\n D 1 1.00\n 2.3140000 1.0000000\n D 1 1.00\n 0.6450000 1.0000000\n D 1 1.00\n 0.2140000 1.0000000\n D 1 1.00\n 0.0710000 1.0000000\n D 1 1.00\n 0.0235561 1.0000000\n D 1 1.00\n 0.0078153 1.0000000\n F 1 1.00\n 1.4280000 1.0000000\n F 1 1.00\n 0.5000000 1.0000000\n F 1 1.00\n 0.1750000 1.0000000\n F 1 1.00\n 0.0612500 1.0000000\n F 1 1.00\n 0.0214375 1.0000000\n ****\n\\end{verbatim}\n\\end{small}\n\n\\end{appendices}\n\n\n\\end{document}","meta":{"arxiv_id":"1711.10948","language":"en","source":"arxiv","timestamp":1512033080000,"url":"https:\/\/arxiv.org\/abs\/1711.10948","yymm":"1711"}} {"text":"\\section{The pentagon-wheel cocycle}\n\\hangindent=-6.5cm\\hangafter=-3%\n{\\unitlength=1mm\n\\begin{picture}(0,0)(-32,2.5)\n\\put(65,0){$\\boldsymbol{\\gamma}_5 = {}$}\n\\put(85,0){\n{\\unitlength=0.3mm\n\\begin{picture}(55,53)(5,-5\n\\put(27.5,8.5){\\circle*{3}}\n\\put(0,29.5){\\circle*{3}}\n\\put(-27.5,8.5){\\circle*{3}}\n\\put(-17.5,-23.75){\\circle*{3}}\n\\put(17.5,-23.75){\\circle*{3}}\n\\qbezie\n(27.5,8.5)(0,29.5)(0,29.5)\n\\qbezie\n(0,29.5)(-27.5,8.5)(-27.5,8.5)\n\\qbezie\n(-27.5,8.5)(-17.5,-23.75)(-17.5,-23.75)\n\\qbezie\n(-17.5,-23.75)(17.5,-23.75)(17.5,-23.75)\n\\qbezie\n(17.5,-23.75)(27.5,8.5)(27.5,8.5)\n\\put(0,0){\\circle*{3}}\n\\qbezie\n(27.5,8.5)(0,0)(0,0)\n\\qbezie\n(0,29.5)(0,0)(0,0)\n\\qbezie\n(-27.5,8.5)(0,0)(0,0)\n\\qbezie\n(-17.5,-23.75)(0,0)(0,0)\n\\qbezie\n(17.5,-23.75)(0,0)(0,0)\n\\end{picture}\n}\n}\n\\put(95,0){${}+\\dfrac{5}{2}$}\n\\put(114,0){\n{\\unitlength=0.4mm\n\\begin{picture}(50,30)(0,-4\n\\put(12,0){\\circle*{2.5}}\n\\put(-12,0){\\circle*{2.5}}\n\\put(25,15){\\circle*{2.5}}\n\\put(-25,15){\\circle*{2.5}}\n\\put(-25,-15){\\circle*{2.5}}\n\\put(25,-15){\\circle*{2.5}}\n\\put(-12,0){\\line(1,0){24}}\n\\put(-25,15){\\line(1,0){50}}\n\\put(-25,-15){\\line(1,0){50}}\n\\put(-25,-15){\\line(0,1){32}}\n\\put(25,15){\\line(0,-1){32}}\n\\qbezie\n(25,15)(12,0)(12,0)\n\\qbezie\n(-25,15)(-12,0)(-12,0)\n\\qbezie\n(-25,-15)(-12,0)(-12,0)\n\\qbezie\n(25,-15)(12,0)(12,0)\n\\put(-12.5,17){\\oval(25,10)[t]}\n\\put(12.5,-17){\\oval(25,10)[b]}\n\\put(0,2){\\line(0,1){11}}\n\\put(0,-2){\\line(0,-1){11}}\n\\end{picture}\n}%\n}\n\\end{picture}%\n}%\n\\label{FigPentagon}%\nNow consider the pentagon\\\/-\\\/wheel cocycle\n$\\boldsymbol{\\gamma}_5 \\in \\ker \\Id$,\nsee~\\cite{JNMP17}.\nBy orienting both graphs in $\\boldsymbol{\\gamma}_5$ (i.e.\\ by shifting the vertex labelling by $+1 = m-1$, adding two edges to the sinks $\\mathsf{0}$,\\ $\\mathsf{1}$, and keeping only those oriented graphs out of $1024 = 2^{\\text{\\#edges}}$ which are built from $\\xleftarrow{}{\\bullet} \\xrightarrow{}$) and skew\\\/-\\\/symmetrizing with respect to $\\mathsf{0} \\rightleftarrows \\mathsf{1}$, we obtain $91$ parameters for\nKontsevich graphs on $2$ sinks, $6$ internal vertices, and $12$ ($=6$ pairs) of edges.\nWe take the sum $\\mathcal{Q}$ of these $91$ bi-\\\/vector graphs (or skew differences of\nKontsevich graphs)\nwith their undetermined coefficients, and for the set of tri\\\/-\\\/vector graphs occurring in $\\schouten{\\mathcal{P},\\mathcal{Q}}$, we generate all the possibly\nneeded tri\\\/-\\\/vector ``Leibniz'' graphs with $\\schouten{\\mathcal{P},\\mathcal{P}}$ inside.\\footnote{%\nThe algorithm from~\\cite[\\S1.2]{JPCS17} produces 41031\nLeibniz graphs in $\\nu=3$ \niterations and 56509\nat~$\\nu\\geqslant7$.}\nThis yields 41031\nsuch Leibniz graphs,\nwhich, with undetermined coefficients, provide the ansatz for the r.-h.s.\\ of the factorization problem\n\\label{EqFactor}\n$\\schouten{\\mathcal{P},\\mathcal{Q}(\\mathcal{P})} = \\Diamond\\bigl(\\mathcal{P},\\schouten{\\mathcal{P},\\mathcal{P}}\\bigr)$.\nThis gives us an inhomogeneous system of 463,344\nlinear algebraic equations for both the coefficients in $\\mathcal{Q}$ and~$\\Diamond$.\nIn its l.-h.s., we fix the coefficient of one bi\\\/-\\\/vector graph\\footnote{This is done because it is anticipated that, counting the number of ways to obtain a given bi\\\/-\\\/vector while orienting the nonzero cocycle~$\\boldsymbol{\\gamma}_5$, none of the coefficients in a solution~$\\mathcal{Q}_5$ vanishes.}\nby setting it to~${\\mathbf{+2}}$.\n\n\\begin{claim}For~$\\boldsymbol{\\gamma}_5$,\nthe factorization problem $\\schouten{\\mathcal{P},\\mathcal{Q}(\\mathcal{P})} = \\Diamond(\\mathcal{P},\\schouten{\\mathcal{P},\\mathcal{P}})$ has a solution $(\\mathcal{Q}_5, \\Diamond_5)$\\textup{;} the sum $\\mathcal{Q}_5$ of $167$ Kontsevich graphs \\textup{(}on $m=2$ sinks $\\mathsf{0},\\mathsf{1}$ and $n=6$ internal vertices $\\mathsf{2}$\\textup{,} $\\ldots$\\textup{,} $\\mathsf{7}$\\textup{)} with integer coefficients is given in the table\nbelow\n\\footnote{%\nThe analytic formula of degree\\\/-\\\/six nonlinear differential polynomial $\\mathcal{Q}_5(\\mathcal{P})$ is given in App.~\\ref{AppFormula}.\nThe encoding of $\n691$ Leibniz tri\\\/-\\\/vector graphs containing the Jacobiator $\\schouten{\\mathcal{P},\\mathcal{P}}$ for the Poisson structure $\\mathcal{P}$ that occur in the r.-h.s.\\ $\\Diamond(\\mathcal{P}, \\schouten{\\mathcal{P},\\mathcal{P}})$ is available at \\texttt{}. \nThe machine format to encode such graphs (with one tri\\\/-\\\/valent vertex for the Jacobiator) is explained\nin~\\cite{JPCS17} (see also~\\cite{f16,cpp}).}%\n\\end{claim}\n\n{\\tiny\\centerline\n\\begin{tabular}{l|r|}\n0 1 2 4 2 5 3 6 4 7 2 4 & $10$\\\\\n0 1 2 4 2 5 2 6 4 7 3 4 & $-10$\\\\\n0 3 1 4 2 5 6 7 2 4 3 4 & $10 $\\\\\n0 3 4 5 1 2 6 7 2 3 3 4 & $-10$\\\\\n0 3 1 4 2 5 2 6 4 7 3 4 & $10 $\\\\\n0 3 4 5 1 2 4 6 3 7 2 3 & $-10$\\\\\n0 3 1 4 2 5 3 6 4 7 2 4 & $-10$\\\\\n0 3 4 5 1 2 2 6 3 7 3 4 & $-10$\\\\\n0 3 1 4 5 6 2 3 5 7 2 5 & $-10$\\\\\n0 3 4 5 2 6 4 7 1 2 4 6 & $10 $\\\\\n0 3 4 5 1 6 2 4 5 7 2 5 & $10 $\\\\\n0 3 4 5 2 6 4 6 1 7 2 4 & $-10$\\\\\n0 3 4 5 2 6 4 7 2 7 1 4 & $-10$\\\\\n0 3 4 5 1 6 2 4 3 7 2 3 & $10 $\\\\\n0 3 4 5 2 6 6 7 1 3 2 3 & $-10$\\\\\n0 3 4 5 2 6 2 7 1 3 3 6 & $10 $\\\\\n0 3 4 5 1 6 4 7 2 3 2 3 & $-10$\\\\\n0 3 4 5 1 5 2 6 2 7 4 5 & $10 $\\\\\n0 3 4 5 1 6 2 7 2 3 3 4 & $10 $\\\\\n0 3 4 5 1 5 2 6 4 7 2 5 & $10 $\\\\\n0 3 4 5 1 2 4 6 4 7 2 4 & $-10$\\\\\n0 3 1 4 2 5 2 6 2 7 2 3 & $-10$\\\\\n0 3 1 4 2 5 3 6 3 7 2 3 & $-10\n\\end{tabular}\n\\hskip 10pt\n\\begin{tabular}{l|r|}\n0 3 4 5 1 2 2 6 2 7 2 4 & $-10$\\\\\n0 3 1 4 5 6 2 3 3 7 2 3 & $-10$\\\\\n0 3 4 5 2 6 2 7 1 2 2 6 & $10$\\\\\n0 1 2 4 2 5 2 6 2 7 2 3 & $\\mathbf{2}$\\\\\n0 1 2 4 2 5 2 6 3 7 3 4 & $-5$\\\\\n0 1 2 4 2 5 3 6 3 7 2 4 & $5 $\\\\\n0 1 2 4 2 5 2 6 3 7 4 5 & $-5 $\\\\\n0 1 2 4 2 5 2 6 4 7 3 5 & $-5 $\\\\\n0 3 1 4 5 6 2 7 5 7 2 3 & $5 $\\\\\n0 3 4 5 5 6 6 7 2 7 1 2 & $5 $\\\\\n0 3 1 4 2 5 6 7 2 4 3 6 & $5 $\\\\\n0 3 4 5 1 2 6 7 2 7 3 4 & $-5 $\\\\\n0 3 1 4 2 5 2 6 3 7 4 5 & $5 $\\\\\n0 3 4 5 1 2 4 6 2 7 3 5 & $-5 $\\\\\n0 3 1 4 2 5 2 6 4 7 3 5 & $5 $\\\\\n0 3 4 5 1 2 4 6 3 7 2 5 & $-5 $\\\\\n0 3 4 5 1 2 6 7 2 3 4 6 & $5 $\\\\\n0 3 1 4 2 5 6 7 2 7 3 4 & $5 $\\\\\n0 3 4 5 1 2 2 6 4 7 3 5 & $5 $\\\\\n0 3 1 4 2 5 3 6 2 7 4 5 & $-5 $\\\\\n0 3 4 5 1 2 2 6 3 7 4 5 & $5 $\\\\\n0 3 1 4 2 5 3 6 4 7 2 5 & $-5 $\\\\\n0 3 4 5 2 6 6 7 1 2 3 4 & $5 $\n\\end{tabular}\n\\hskip 10pt\n\\begin{tabular}{l|r|}\n0 3 1 4 5 6 2 3 2 7 4 5 & $5 $\\\\\n0 3 4 5 2 6 4 7 1 2 3 6 & $5 $\\\\\n0 3 1 4 5 6 2 3 5 7 2 4 & $-5 $\\\\\n0 3 4 5 1 2 6 7 2 4 4 6 & $-5 $\\\\\n0 3 1 4 2 5 6 7 2 3 2 6 & $-5 $\\\\\n0 3 1 4 5 6 2 3 5 7 2 3 & $-5 $\\\\\n0 3 4 5 2 6 4 7 1 2 2 6 & $5 $\\\\\n0 3 1 4 2 5 6 7 2 3 3 4 & $5 $\\\\\n0 3 4 5 1 2 6 7 2 3 2 4 & $-5 $\\\\\n0 3 1 4 2 5 3 6 4 7 2 3 & $-5 $\\\\\n0 3 4 5 1 2 2 6 3 7 2 4 & $-5 $\\\\\n0 3 1 4 2 5 6 7 2 3 3 6 & $-5 $\\\\\n0 3 4 5 1 2 6 7 2 4 2 6 & $-5 $\\\\\n0 3 4 5 1 2 6 7 2 4 3 4 & $-5 $\\\\\n0 3 1 4 2 5 6 7 2 3 2 4 & $5 $\\\\\n0 3 4 5 1 2 4 6 3 7 2 4 & $-5 $\\\\\n0 3 1 4 2 5 2 6 4 7 2 3 & $-5 $\\\\\n0 1 2 4 2 5 6 7 2 7 3 4 & $-5 $\\\\\n0 1 2 4 2 5 3 6 2 7 4 5 & $5 $\\\\\n0 1 2 4 2 5 3 6 4 7 2 5 & $5 $\\\\\n0 1 2 4 2 5 3 6 2 7 3 5 & $5 $\\\\\n0 1 2 4 2 5 3 6 3 7 2 5 & $5 $\\\\\n0 3 4 5 1 2 4 6 2 7 4 5 & $-5$\n\\end{tabular}\n\\hskip 10pt\n\\begin{tabular}{l|r|}\n0 3 1 4 2 5 2 6 3 7 2 5 & $5 $\\\\\n0 3 4 5 1 2 4 6 4 7 2 5 & $-5 $\\\\\n0 3 1 4 2 5 2 6 2 7 3 5 & $5 $\\\\\n0 3 1 4 5 6 2 6 3 7 2 3 & $-5 $\\\\\n0 3 4 5 2 6 4 7 2 7 1 2 & $-5 $\\\\\n0 3 1 4 5 6 2 3 2 7 3 4 & $-5 $\\\\\n0 3 4 5 2 6 6 7 1 2 2 3 & $-5 $\\\\\n0 3 1 4 5 6 2 3 3 7 2 4 & $-5 $\\\\\n0 3 4 5 2 6 2 7 1 2 3 6 & $5 $\\\\\n0 3 1 4 2 5 3 6 2 7 3 5 & $-5 $\\\\\n0 3 4 5 1 2 2 6 4 7 2 5 & $5 $\\\\\n0 3 1 4 2 5 3 6 3 7 2 5 & $-5 $\\\\\n0 3 4 5 1 2 2 6 2 7 4 5 & $5 $\\\\\n0 3 4 5 5 6 6 7 1 2 2 6 & $-5 $\\\\\n0 3 1 4 5 6 2 6 2 7 2 3 & $5 $\\\\\n0 1 2 4 2 5 2 6 2 7 3 4 & $-5 $\\\\\n0 1 2 4 2 5 2 6 3 7 2 5 & $-5 $\\\\\n0 1 2 4 2 5 2 6 2 7 3 5 & $-5 $\\\\\n0 3 4 5 2 6 6 7 1 2 4 6 & $5 $\\\\\n0 3 1 4 5 6 2 3 2 7 2 5 & $-5 $\\\\\n0 3 4 5 1 2 4 6 4 7 2 3 & $-5 $\\\\\n0 3 1 4 2 5 2 6 2 7 3 4 & $5 $\\\\\n\\multicolumn{2}{c|}{(\\textit{see next page})}\n\\end{tabular}}\n\n}\n\n\\twocolumn\n\\begin{minipage}{\\textwidth}\n{\\tiny\\centerline\n\\begin{tabular}{l|r|}\n0 3 4 5 1 2 2 6 4 7 3 4 & $-5 $\\\\\n0 3 1 4 2 5 3 6 2 7 2 4 & $-5 $\\\\\n0 3 1 4 5 6 2 3 3 7 2 5 & $-5 $\\\\\n0 3 4 5 2 6 2 7 1 2 4 6 & $5 $\\\\\n0 3 1 4 5 6 2 7 3 7 2 3 & $-5 $\\\\\n0 3 4 5 2 6 6 7 2 7 1 2 & $-5 $\\\\\n0 3 1 4 2 5 3 6 3 7 2 4 & $-5 $\\\\\n0 3 4 5 1 2 2 6 2 7 3 4 & $-5 $\\\\\n0 3 1 4 2 5 2 6 3 7 3 4 & $5 $\\\\\n0 3 4 5 1 2 4 6 2 7 2 3 & $-5 $\\\\\n0 3 4 5 1 6 2 7 5 7 2 4 & $-5 $\\\\\n0 3 4 5 2 6 4 6 1 7 2 5 & $-5 $\\\\\n0 3 4 5 1 6 2 7 2 5 4 6 & $5 $\\\\\n0 3 4 5 1 6 4 7 2 5 2 3 & $-5 $\\\\\n0 3 4 5 1 6 2 6 2 7 4 5 & $5 $\\\\\n0 3 4 5 1 6 2 7 2 7 3 4 & $5 $\\\\\n0 3 4 5 2 6 6 7 1 7 2 3 & $-5 $\\\\\n0 3 4 5 1 5 6 7 2 3 2 4 & $5 $\\\\\n0 3 4 5 2 6 4 6 1 7 2 3 & $-5$\n\\end{tabular}\n\\hskip 10pt\n\\begin{tabular}{l|r|}\n0 3 4 5 1 5 6 7 2 4 2 6 & $5 $\\\\\n0 3 4 5 2 6 2 7 1 5 3 6 & $5 $\\\\\n0 3 4 5 1 6 2 6 3 7 2 4 & $5 $\\\\\n0 3 4 5 2 6 2 6 1 7 3 4 & $-5 $\\\\\n0 3 4 5 2 6 4 7 1 5 2 6 & $-5 $\\\\\n0 3 4 5 1 6 2 7 2 5 3 4 & $5 $\\\\\n0 3 4 5 1 6 4 7 2 5 2 6 & $5 $\\\\\n0 3 4 5 1 6 4 7 2 7 2 3 & $-5 $\\\\\n0 3 4 5 1 6 4 6 2 7 2 5 & $5 $\\\\\n0 3 4 5 1 6 2 7 3 5 2 4 & $-5 $\\\\\n0 3 4 5 2 5 6 7 1 4 2 6 & $-5 $\\\\\n0 3 4 5 2 6 4 7 2 7 1 3 & $-5 $\\\\\n0 3 4 5 2 5 6 7 1 3 2 6 & $-5 $\\\\\n0 3 4 5 2 6 6 7 1 7 2 4 & $5 $\\\\\n0 3 4 5 1 6 2 4 5 7 2 3 & $5 $\\\\\n0 3 4 5 2 6 6 7 2 7 1 4 & $-5 $\\\\\n0 3 4 5 1 6 2 4 3 7 2 5 & $5 $\\\\\n0 3 4 5 2 6 2 7 1 3 4 6 & $5 $\\\\\n0 3 4 5 2 6 6 7 1 3 2 4 & $-5$\n\\end{tabular}\n\\hskip 10pt\n\\begin{tabular}{l|r|}\n0 3 4 5 1 6 2 7 2 3 4 6 & $-5 $\\\\\n0 3 4 5 1 5 2 6 4 7 2 3 & $5 $\\\\\n0 3 4 5 1 5 2 6 2 7 3 4 & $-5 $\\\\\n0 3 4 5 1 6 4 7 2 3 2 6 & $-5 $\\\\\n0 3 4 5 1 6 2 4 2 7 4 5 & $-5 $\\\\\n0 3 4 5 1 6 2 7 2 7 2 4 & $-5 $\\\\\n0 3 4 5 1 6 2 4 5 7 2 4 & $5 $\\\\\n0 3 4 5 2 6 2 6 1 7 2 4 & $-5 $\\\\\n0 3 4 5 1 5 2 6 4 7 2 4 & $5 $\\\\\n0 3 4 5 1 6 2 7 2 3 2 4 & $5 $\\\\\n0 3 4 5 1 6 2 4 2 7 3 4 & $5 $\\\\\n0 3 4 5 1 6 2 6 2 7 2 4 & $-5 $\\\\\n0 3 4 5 1 6 2 4 3 7 2 4 & $5 $\\\\\n0 3 4 5 2 6 2 7 1 5 2 6 & $5 $\\\\\n0 3 4 5 2 6 6 7 1 3 2 6 & $-5 $\\\\\n0 3 4 5 2 6 2 7 1 3 2 6 & $5 $\\\\\n0 3 4 5 1 6 4 7 2 3 2 4 & $-5 $\\\\\n0 3 4 5 1 5 2 6 2 7 2 4 & $-5 $\\\\\n0 3 4 5 1 6 4 7 2 7 2 4 & $5$\n\\end{tabular}\n\\hskip 10pt\n\\begin{tabular}{l|r|}\n0 3 4 5 1 6 2 4 2 7 2 5 & $5 $\\\\\n0 3 4 5 1 6 4 6 2 7 2 4 & $5 $\\\\\n0 3 4 5 1 6 2 4 2 7 2 3 & $5 $\\\\\n0 3 4 5 2 6 4 7 5 7 1 2 & $5 $\\\\\n0 3 1 4 5 6 2 6 3 7 2 5 & $5 $\\\\\n0 3 4 5 2 5 6 7 1 2 4 6 & $-5 $\\\\\n0 3 1 4 5 6 2 7 3 5 2 6 & $5 $\\\\\n0 3 4 5 2 5 6 7 1 2 3 6 & $-5 $\\\\\n0 3 1 4 5 6 2 7 3 5 2 4 & $5 $\\\\\n0 3 4 5 2 6 6 7 3 7 1 2 & $5 $\\\\\n0 3 1 4 5 6 2 7 3 7 2 4 & $5 $\\\\\n0 3 4 5 5 6 6 7 1 2 2 3 & $5 $\\\\\n0 3 1 4 5 6 2 6 2 7 3 4 & $5 $\\\\\n0 3 4 5 1 2 2 6 4 7 2 4 & $-5 $\\\\\n0 3 1 4 2 5 3 6 2 7 2 3 & $-5 $\\\\\n0 3 4 5 2 6 6 7 1 2 2 6 & $5 $\\\\\n0 3 1 4 5 6 2 3 2 7 2 3 & $-5 $\\\\\n0 3 4 5 1 2 4 6 2 7 2 4 & $-5 $\\\\\n0 3 1 4 2 5 2 6 3 7 2 3 & $-5$\n\\end{tabular}}\n\n}\n\n\\smallskip\n\\begin{rem}\nTo establish\nthe\nformula for\nthe morphism ${\\rm O\\mathaccent \"017E\\relax {r}}$ that\nwould be universal with respect to\nall cocycles $\\gamma \\in \\ker \\Id$, we are accumulating a sufficient number of pairs ($\\Id$-\\\/cocycle $\\gamma$, $\\partial_\\mathcal{P}$-\\\/cocycle $\\mathcal{Q}$), in which $\\mathcal{Q}$ is built exactly\nfrom graphs that one obtains from orienting the graphs in~$\\gamma$.\nLet us remember that not only nontrivial cocycles (e.g., $\\boldsymbol{\\gamma}_3$,\\ $\\boldsymbol{\\gamma}_5$,\\ or $\\boldsymbol{\\gamma}_7$ from~\\cite{JNMP17}, cf.~\\cite{DolgushevRogersWillwacher,WillwacherGRT})\nbut also $\\Id$-\\\/trivial, like $\\delta_6$ on p.~\\pageref{ExDifferential}, or even the `zero' non\\\/-\\\/oriented graphs are\nsuited for this purpose:\ne.g., a unique ${\\rm O\\mathaccent \"017E\\relax {r}}(w_4)(\\mathcal{P})\\equiv 0$ constrains~${\\rm O\\mathaccent \"017E\\relax {r}}$.\nIn every such case, the respective $\\partial_\\mathcal{P}$-\\\/cocycle is obtained\\footnote\nThe actually found $\\partial_\\mathcal{P}$-\\\/cocycle $\\mathcal{Q}$ might differ from the value ${\\rm O\\mathaccent \"017E\\relax {r}}(\\gamma)$ by $\\partial_\\mathcal{P}$-\\\/trivial or improper terms,\ni.e.\\\n$\\mathcal{Q} = {\\rm O\\mathaccent \"017E\\relax {r}}(\\gamma) + \\partial_\\mathcal{P}({\\EuScript X}} %{\\mathcal{X}) + \\nabla(\\mathcal{P},\\schouten{\\mathcal{P},\\mathcal{P}}) $\nfor some vector field ${\\EuScript X}} %{\\mathcal{X}$ realized by Kontsevich graphs and for some ``Leibniz'' bi\\\/-\\\/vector graphs\n$\\nabla$\nvanishing identically at every Poisson structure~$\\mathcal{P}$.}\nby\nsolving the factorization problem $\\schouten{\\mathcal{P},\\mathcal{Q}(\\mathcal{P})} \\doteq 0$ via $\\schouten{\\mathcal{P},\\mathcal{P}} = 0$.\nThe formula\nof the orientation morphism ${\\rm O\\mathaccent \"017E\\relax {r}}$ will be the object of another paper.\n\\end{rem} \n\n\n\n{\\small\n\\noindent\\textbf{Acknowledgements.}\nThe authors thank M.~Kontsevich and T.~Willwacher for recalling the existence of the orientation morphism~${\\rm O\\mathaccent \"017E\\relax {r}}$.\nA.V.K.\\ thanks the organizers of international workshop SQS'17 (July~31 -- August~5, 2017 at JINR Dubna, Russia) for\ndiscussions.%\n\\footnote{As soon as the expression of $167$ Kontsevich\ngraph coefficients in $\\mathcal{Q}_5$ via\nthe $91$\ninteger parameters was obtained,\nthe linear system\nin factorization $\\schouten{\\mathcal{P}, \\mathcal{Q}_5(\\mathcal{P})} = \\Diamond(\\mathcal{P}, \\schouten{\\mathcal{P},\\mathcal{P}})$ for the\n\npentagon\\\/-\\\/wheel flow $\\dot{\\mathcal{P}} = \\mathcal{Q}_5(\\mathcal{P})$ was solved\nindependently by A.\nSteel (Sydney) using the Markowitz pivoting run in \\textsc{Magma}.\nThe flow components $\\mathcal{Q}_5$ of all the known solutions $(\\mathcal{Q}_5, \\Diamond_5)$ match\nidentically.\n(For the flow $\\dot{\\mathcal{P}} = \\mathcal{Q}_5(\\mathcal{P}) = {\\rm O\\mathaccent \"017E\\relax {r}}(\\boldsymbol{\\gamma}_5)(\\mathcal{P})$, uniqueness is not claimed for the operator $\\Diamond$ in the r.-h.s.\\ of the factorization.)%\n\n}\n\n}\n\n\n\\end{minipage}%\n]\n{\\footnotesize","meta":{"arxiv_id":"1712.05259","language":"en","source":"arxiv","timestamp":1513328930000,"url":"https:\/\/arxiv.org\/abs\/1712.05259","yymm":"1712"}} {"text":"\\section{Introduction}\n Task-oriented dialogue, different from chit-chat type of conversation, requires the system to produce responses by accessing information from knowledge bases and planning over multiple dialogue turns. Conventional task-oriented dialogue systems have a complex pipeline \\cite{raux2005let,young2013pomdp} consisting of independently developed and modularly connected components for natural language understanding (NLU) \\cite{mesnil2015using,Liu2016}, dialogue state tracking (DST) \\cite{henderson2014word,mrkvsic2016neural}, and dialogue policy \\cite{gasic2014gaussian,su2016line}. A limitation with such pipelined design is that errors made in upper stream modules may propagate to downstream components, making it hard to identify and track the source of errors. Moreover, each component in the pipeline is ideally re-trained as preceding components are updated, so that we have inputs similar to the training examples at run-time. This domino effect causes several issues in practice.\n \n To ameliorate these limitations with the conventional pipeline dialogue systems, recent efforts have been made in designing neural network based end-to-end learning solutions. Such end-to-end systems aim to optimize directly towards final system objectives (e.g. response generation, task success rate) instead of performing component-wise optimization. Many of the recently proposed end-to-end models are trained in supervised manner \\cite{wenN2N16,bordes2017,eric2017copy,Liu2017} by learning from human-human or human-machine dialogue corpora. Deep reinforcement learning (RL) based systems \\cite{li2017end,liu2017iterative,williams2017hybrid,dhingra2017towards} that learns by interacting with human user or user simulator have also been studied in the literature. Comparing to supervised training models, systems trained with deep RL showed improved task success rate and model robustness towards diverse dialogue scenarios. \n \n In this work, we present a neural network based task-oriented dialogue system that can be optimized end-to-end with deep RL. The system is built with neural network components for natural language encoding, dialogue state tracking, and dialogue policy learning. Each system component takes in underlying component's outputs in a continuous from which is fully differentiable with respect to the system optimization target, and thus the entire system can be trained end-to-end. In the experiments on a movie booking domain, we show that our system trained with deep RL leads to significant improvement on dialogue task success rate comparing to supervised training systems. We further illustrate the benefit of performing end-to-end optimization comparing to only updating the policy network during online policy learning as in many previous work \\cite{gasic2014gaussian,su2016line}.\n \n\\section{Related Work}\n\tTraditional task-oriented dialogue systems typically require a large number of handcrafted features, making it hard to extend a system to new application domains. Recent approaches to task-oriented dialogue treat the task as a partially observable Markov Decision Process (POMDP) \\cite{young2013pomdp} and use RL for online policy optimization by interacting with users \\cite{gavsic2013line}. The dialogue state and action space have to be carefully designed in order to make the reinforcement policy learning tractable \\cite{young2013pomdp}.\n \n With the success of end-to-end trainable neural network models in modeling non-task-oriented chit-chat dialogues \\cite{serban2015building,li2016deep}, efforts have been made in carrying over the good performance of end-to-end models to task-oriented dialogues. Bordes and Weston \\cite{bordes2017} proposed modeling task-oriented dialogues with a machine reading approach using end-to-end memory networks. Their model removes the dialogue state tracking module and selects the final system response directly from candidate responses. Comparing to this approach, our model explicitly tracks user's goal in dialogue state over the sequence of turns, as robust dialogue state tracking has been shown \\cite{jurvcivcek2012reinforcement,dhingra2017towards} to be useful for interfacing with a knowledge base (KB) and improving task success rate. Wen et al.~\\cite{wenN2N16} proposed an end-to-end trainable neural network model with modularly connected system components. This system is trained in a supervised manner, and thus may not be robust enough to handle diverse dialogue situations due to the limited varieties in the training dialogue samples. Our system is trained by a combination of SL and deep RL methods, as it is shown that RL training may effectively improve the system robustness and dialogue success rate \\cite{li2017end,williams2017hybrid}. Dhingra et al.~\\cite{dhingra2017towards} proposed an end-to-end RL dialogue agent for information access. Their model focuses on bringing differentiability to the KB query operation by introducing a \"soft\" retrieval process in selecting the KB entries. Such soft-KB lookup may be prone to information updates in the KB, which is common in real world information systems. In our model, we use symbolic query and leave the selection of KB entities to external services (e.g. a recommender system), as entity ranking in real world systems can be made with much richer feature sets (e.g. user profiles, location and time context, etc.). Quality of the generated query is directly related to the performance of our dialog state tracking module, which can be optimized during user interactions in the proposed end-to-end reinforcement learning model.\n\n\\section{Proposed Method}\n\\subsection{System Architecture}\n\\label{sec:method}\n\tFigure \\ref{fig:e2e_dialogue_nn} shows the overall system architecture of the proposed end-to-end task-oriented dialogue model. A continuous form dialogue state over a sequence of turns is maintained in the state $s_k$ of a dialogue-level LSTM. At each dialogue turn $k$, this dialogue-level LSTM takes in the encoding of the user utterance $U_k$ and the encoding of the previous system action $A_{k-1}$, and produces a probability distribution $P(l^{m}_k)$ over candidate values for each of the tracked goal slots:\n \\begin{align}\n & s_k = \\operatorname{LSTM}(s_{k-1}, \\hspace{1mm} [U_k, \\hspace{1mm} A_{k-1}]) \\\\\n & P(l^{m}_k \\hspace{1mm} | \\hspace{1mm} \\mathbf{U}_{\\le k}, \\hspace{1mm} \\mathbf{A}_{< k}) = \\operatorname{SlotDist}_{m}(s_k)\n \\end{align}\n where $\\operatorname{SlotDist}_{m}$ is a single hidden layer MLP with $\\operatorname{softmax}$ activation function over slot type $m \\in M$. In encoding natural language user utterance to a continuous vector $U_k$, we use a bidirectional LSTM (i.e. an utterance-level LSTM) reader by concatenating the last forward and backward LSTM states. \n\n\\begin{figure*}[t]\n \\centering\n \\includegraphics[width=220pt]{e2e_dialogue_nn.pdf}\n \\vspace*{-2ex}\n \\caption{{ Proposed end-to-end task-oriented dialogue model architecture. }}\n \\label{fig:e2e_dialogue_nn}\n \\vspace*{-2ex}\n\\end{figure*}\n\n Based on slot-value pair outputs from dialogue state tracking, a query command is formulated by filling a query template with candidate values that have the highest probability for each tracked goal slot. Alternatively, an n-best list of queries can be generated with the most probable candidate values. The query is sent to a KB to retrieve user requested information. Finally, a system action is emitted in response to the user's input based on the current dialogue state and the information retrieved from the knowledge base: \n \\begin{align}\n P(a_{k} \\hspace{1mm} | \\hspace{1mm} U_{\\le k}, \\hspace{1mm} A_{< k}, \\hspace{1mm} E_{\\le k}) = \\operatorname{PolicyNet}(s_{k}, v_{k}, E_{k})\n \\end{align}\n where $v_{k}$ represents the concatenated log probabilities of candidate values for each goal slot. $E_{k}$ is the encoding of the retrieved result from the knowledge base (e.g. item availability and number of matched items). $\\operatorname{PolicyNet}$ is an MLP with $\\operatorname{softmax}$ activation function over all system actions. The emitted system action is then translated to a system response in natural language format by combining the state tracking outputs and the query results. We use a template based natural language generator (NLG) in this work.\n\n\\subsection{Model Training}\n We first train the system in a supervised manner using task-oriented dialogue corpora. Based on system inputs with past user utterances, system actions, and KB results, the model tracks the user's goal slot values and predict the next system action. We optimize the model to minimize the linear interpolation of cross-entropy losses for dialogue state tracking and system action prediction: \n\t\\begin{equation}\n \\begin{split}\n \\min_{\\theta} \\sum_{k=1}^{K} -\\Big[ \\sum_{m=1}^{M} &\\lambda _{l^{m}} \\log P({l^{m}_k}^{*} | \\mathbf{U}_{\\le k}, \\mathbf{A}_{< k}, \\mathbf{E}_{< k}; \\theta) \\\\\n + &\\lambda _a \\log P(a_k^{*} | \\mathbf{U}_{\\le k}, \\mathbf{A}_{< k}, \\mathbf{E}_{\\le k}; \\theta) \\hspace{1mm} \\Big] \\\\\n \\end{split}\n \\end{equation}\n where $\\lambda$s are the linear interpolation weights for the cost of each system output. ${l^{m}_k}^{*}$ and $a_k^{*}$ are the ground truth labels for goal slots and system action the $k$th turn.\n\n After the supervised training stage, we further optimize the system with RL by letting the agent to interact with users and collecting user feedback. \n We apply REINFORCE algorithm \\cite{williams1992simple} in optimizing the network parameters. We use softmax policy during RL training to encourage the agent to explore the dialogue action space. Feedback is only collected at the end of a dialogue. A positive reward is assigned for success tasks, and a zero reward is assigned for failure tasks. A small step penalty is applied to each dialogue turn to encourage the agent to complete the task in fewer steps. We use policy gradient method for dialogue policy learning. With likelihood ratio gradient estimator, the gradient of the objective function $J_k(\\theta)$ can be derived as:\n \\begin{equation}\n \\begin{split}\n \\nabla _{\\theta} J_k(\\theta) = \\nabla _{\\theta} \\mathbb E_{\\theta}\\left[ R_k \\right]\n \n = \\mathbb E_{\\theta_a}\\left[ \\nabla _{\\theta} \\log \\pi _{\\theta}(a_{k} | s_{k}) R_{k} \\right]\n \\end{split}\n \\end{equation}\n This last expression above gives us an unbiased gradient estimator. We sample the agent action based on the currently learned policy at each dialogue turn and compute the gradient. \n \n\\section{Experiments}\n\\subsection{Datasets}\n We evaluate the proposed method on DSTC2 \\cite{henderson2014second} dataset in restaurant search domain and an internally collected dialogue corpus in movie booking domain. The movie booking corpus is generated with rule based dialogue agent and user simulator. The same user simulator is used to interact with our end-to-end learning agent during RL training. We use an extended set of NLG templates during model testing to evaluate the end-to-end model's generalization capability in handling diverse natural language inputs.\n \n\\subsection{Training Settings}\n We set state size of the dialogue-level and utterance-level LSTM as 200 and 150 respectively. Hidden layer size of the policy network is set as 100. We used randomly initialized word embedding of size 300. Adam optimization method \\cite{kingma2014adam} with initial learning rate of 1e-3 is used for mini-batch training. Dropout rate of 0.5 is applied during training to prevent the model from over-fitting. \n \n In dialogue simulation, we take a task-oriented dialogue as successful if the goal slot values estimated by the state tracker fully match to the user's true goal values, and the system is able to offer an entity which is finally accepted by the user. Maximum allowed number of dialogue turn is set as 15. A positive reward of +15.0 is given to the agent at the end of a success dialogue, and a zero reward is given in a failure case. We apply a step penalty of -1.0 for each turn to encourage shorter dialogue in completing the task. \n \n\\subsection{Results and Analysis}\n\tTable \\ref{tab:table_dstc2_sl} and Table \\ref{tab:table_movie_corpus_sl} show the supervised training model performance on DSTC2 and the movie booking dialogue dataset. The model is evaluated on dialogue state tracking accuracy. On DSTC2 dataset, our end-to-end model achieves near-state-of-the-art state tracking performance comparing to the recent published results using RNN \\cite{henderson2014robust} and NBT \\cite{mrkvsic2016neural}. On the movie booking dataset, our model also achieves promising performance on individual slot tracking and joint slot tracking accuracy.\n\n \\begin{table}[th]\n \\caption{Belief tracking results on DSTC2 corpus (with ASR hypothesis as input)}\n \\label{tab:table_dstc2_sl}\n \\centering\n \\begin{tabular}{l c c c c}\n \\hline \n \\textbf{Model} & \\textbf{Area} & \\textbf{Food} & \\textbf{Price} & \\textbf{Joint} \\\\\n \\hline\n RNN \\cite{henderson2014robust} & 92 & 86 & 86 & 69 \\\\\n \n NBT \\cite{mrkvsic2016neural} & 90 & 84 & 94 & 72 \\\\\n Our end-to-end model & 90 & 84 & 92 & 72 \\\\\n \\hline\n \\end{tabular} \n \\end{table}\n\n \\begin{table}[th]\n \\caption{Belief tracking results on movie booking dataset}\n \\label{tab:table_movie_corpus_sl}\n \\centering\n \\begin{tabular}{l c c c c c c}\n \\hline \n \\textbf{Model} & \\textbf{Num\\_ticket} & \\textbf{Movie} & \\textbf{Theater} & \\textbf{Date} & \\textbf{Time} & \\textbf{Joint} \\\\\n \\hline \n Our end-to-end model & 98.22 & 91.86 & 97.33 & 99.31 & 97.71 & 84.57 \\\\\n \\hline\n \\vspace*{-1ex}\n \\end{tabular} \n \\end{table}\n\n\tFigure \\ref{fig:e2e_training_curves} shows the RL curves of the proposed model on dialogue task success rate and average dialogue turn size. Evaluation is based on dialogue simulations between our proposed end-to-end dialogue agent and the rule based user simulator. This is different from the evaluations based on fixed dialogue corpora as in Table \\ref{tab:table_dstc2_sl} and \\ref{tab:table_movie_corpus_sl}. The policy gradient based RL training is performed on top of the supervised training model. We compare models with two RL training settings, the end-to-end training and the policy-only training, to the baseline supervised learning (SL) model. \n \n As shown in Figure \\ref{fig:e2e_training_curves}(a), the SL model performs poorly during user interaction, indicating the limited generalization capability of the SL model to unseen dialogue state. Any mistake made by the agent during user interaction may lead to deviation of the dialogue from the training dialogue trajectories and states. The SL agent does not know how to recover from an unknown state, which leads to final task failure. RL model training, under both end-to-end learning and policy-only learning settings, continuously improves the task success rate with the growing number of user interactions. We see clear advantage of performing end-to-end model update in achieving higher dialogue task success rate comparing to only updating the policy network during interactive learning. \n \n Figure \\ref{fig:e2e_training_curves}(b) shows the learning curves for the average number of turns in successful dialogues. We observe decreasing number of dialogue turns along the growing number of interactive learning episodes. This shows that the dialogue agent learns better strategies to successfully complete the task in fewer numbers of turns. Similar to the results for task success rate, the end-to-end training model outperforms the model with policy-only optimization during RL training, achieving lower average number of dialogue turns in successfully completing a task. \n \\begin{figure}[t]\n \\centering\n \\includegraphics[width=\\linewidth]{e2e_training_curves_short.pdf}\n \\vspace*{-4ex}\n \\caption{RL curves on (a) dialogue task success rate and (b) average dialogue turn size.}\n \\label{fig:e2e_training_curves}\n \\end{figure}\n\n\\subsection{Human Evaluations}\n We further evaluate our proposed method with human judges recruited via Amazon Mechanical Turk. Each judge is asked to read a dialogue between our model and the user simulator and rate each system turn on a scale of 1 (frustrating) to 5 (optimal way to help the user). Each turn is rated by 3 different judges. We rate the three models with 100 dialogues each: (i) the SL model, (ii) SL with policy-only RL model, and (iii) SL with end-to-end RL model. Table \\ref{tab:eval_result_human} lists the mean and standard deviation of human evaluation scores over all system turns: end-to-end optimization with RL clearly improves the quality of the model according to human judges.\n\n \\begin{table}[th]\n \\caption{Human evaluation results with mean and standard deviation of crowd worker scores.}\n \\label{tab:eval_result_human}\n \\centering\n \\begin{tabular}{l|c|c|c}\n \\hline\n \\textbf{Model} & SL & SL + policy-only RL & SL + end-to-end RL \\\\ \\hline\n \\textbf{Score} & 3.987 $\\pm$ 0.086 & 4.261 $\\pm$ 0.089 & 4.394 $\\pm$ 0.087 \\\\ \\hline\n \\end{tabular}\n \\end{table}\n \n \\vspace*{-0.5ex}\n\n\\section{Conclusions}\n\tIn this work, we propose a neural network based task-oriented dialogue system that can be trained end-to-end with supervised learning and deep reinforcement learning. We first bootstrap a dialogue agent with supervised training by learning directly from task-oriented dialogue corpora, and further optimize it with deep RL during its interaction with users. We show in the experiments that deep RL optimization on top of the supervised training model leads to significant improvement on task success rate and reduction in dialogue length comparing to supervised training baseline model. The simulation and human evaluation results further illustrate benefits of performing end-to-end model training with deep RL comparing to component-wise optimization. \n\n","meta":{"arxiv_id":"1711.10712","language":"en","source":"arxiv","timestamp":1512378205000,"url":"https:\/\/arxiv.org\/abs\/1711.10712","yymm":"1711"}} -{"text":"\\section{Introduction}\n\nOver the last decades the properties of hadrons in nuclei have been matter of intense investigation. On the one hand, it is believed that hadrons in nuclei is an excellent scenario to test certain symmetries of the \ntheory of the strong interaction, Quantum Chromodynamics (QCD), such as the chiral symmetry in the low-energy regime or heavy-quark symmetries as hadrons with charm or beauty content are produced in the laboratory. On the other hand, it is of crucial importance to understand the excitation mechanisms in the nucleus as well as the nature of certain excited hadronic states, whose structure could be studied in a hot and\/or dense nuclear medium.\n\nIn order to address hadrons in nuclei one can resort to theoretical and\/or experimental analyses \\cite{Metag:2017yuh,Rapp:2011zz}. From the theoretical side, there is an extensive variety of models that aim at understanding the properties of hadrons in nuclei, ranging from relativistic-mean field models (RMF), Nambu-Jona-Lasinio schemes (NJL), quark-meson coupling models (QMC), QCD sum-rule studies to unitarized approaches based on effective theories or meson-exchange models. Experimentally, photon-, electron-, neutrino- and hadron-induced reactions as well as heavy-ion collisions (HiCs) offer a gateway to the properties of hadrons in nuclei. The measurement of transparency ratios is extremely useful for the analysis of the imaginary part of the hadron-nucleon interaction in matter, whereas the excitation functions and the meson-momentum distributions are of fundamental importance to understand the real part of the interaction \\cite{Metag:2017yuh}. Moreover, it is crucial to connect the theoretical predictions to the experimental results, using transport model calculations or collision models based on nuclear spectral functions, to fully understand the dynamics of hadrons in nuclei.\n\nIn this paper we concentrate on the analysis of the properties of mesons with strangeness and charm content that interact with nuclei. Nowadays, strange and charmed hadrons are being produced in nuclear and particle facilities, such as GSI, CERN or RHIC, while they are the subject of future experimental programs, such as FAIR, NICA or J-PARC.\n\n\\section{Strangeness in nuclei}\n\nStrangeness in nuclei has received a lot attention in connection with the study of neutron stars interior \\cite{Watts:2016uzu}, the properties of exotic atoms \\cite{Friedman:2016rfd}, and strangeness production in heavy-ion collisions (HICs) \\cite{Hartnack:2011cn}. In particular, the dynamics of strange mesons, such as $\\bar K$, in vacuum and in the nuclear medium is still a challenge for theory and experiments. In this section the $\\bar K N$ interaction is studied, paying a special attention to the role of the $\\Lambda(1405)$ and the formation of bound states, such as $\\bar KNN$. Moreover, the production and propagation of strangeness in heavy-ion collisions (HICs) is investigated, in view of the present and forthcoming experimental programs on strangeness.\n\n\\subsection{$\\bar K N$ interaction: the $\\Lambda(1405)$}\n\nThe $\\bar K N$ scattering in the $I=0$ channel is governed by the presence of the $\\Lambda(1405)$, located only 27 MeV below the $\\bar K N$ threshold. The dynamical origin of the $\\Lambda(1405)$ dates back more than 50 years ago to the work of Dalitz and Tuan \\cite{Dalitz:1959dn}. Recently, it has been revisited by means of unitarized theories using meson-exchange models \\cite{MuellerGroeling:1990cw,Haidenbauer:2010ch} or chiral Lagrangians \\cite{Kaiser:1995eg, Oset:1997it, Oller:2000fj,Lutz:2001yb,GarciaRecio:2002td,Borasoy:2005ie,Oller:2006jw}, these latter ones analyzing the effects of including a complete basis of meson-baryon channels, studying the differences in the regularization of the equations, including s- and u-channel Born terms in the Lagrangian, implementing next-to-leading (NLO) contributions,... . All these recent efforts have culminated in establishing the $\\Lambda(1405)$ as a superposition of two poles of the scattering matrix \\cite{Jido:2003cb}, that are generated dynamically from the unitarized coupled-channel scheme.\n\nA renewed interest in the $\\bar K N$ interaction has been developed in the past years after the availability of a more precise measurement of the energy shift and width of the $1s$ state in kaonic hydrogen by the SIDDHARTA Collaboration at DA$\\Phi$NE \\cite{Bazzi:2011zj}, that has helped to clarify the discrepancies between the KEK \\cite{Iwasaki:1997wf,Ito:1998yi} and the DEAR \\cite{Beer:2005qi,Cargnelli:2005cf} measurements. The obtained value of the energy shift is $\\Delta E = 283 \\pm 36 \\pm 6$ eV with a width of $\\Gamma=541 \\pm 89 \\pm 22$ eV, in good agreement with KEK results. Furthermore, the SIDDHARTA measurement has provided new constraints on the theoretical predictions reported in \\cite{Ikeda:2011pi,Guo:2012vv,Mai:2012dt,Feijoo:2015yja}. \n\n\n\\begin{figure}[t]\n\\includegraphics[width=.5\\textwidth]{nagae.eps}\n\\includegraphics[width=.5\\textwidth]{Kbar.eps}\n\\caption{Left: Comparison of the binding energy and width of the $K^-pp$ between experiments and theoretical predictions, taken from \\cite{Nagae:2016cbm}. Right: $\\bar K$ spectral function for different densities, temperatures and momenta, taken from \\cite{Tolos:2008di}.}\n\\label{fig1}\n\\end{figure}\n\n\\subsection{$\\bar K NN$ state} \n\nThe dynamical generation of the two-pole structure of the $\\Lambda(1405)$ indicates that the $\\bar K N$ interaction might be attractive enough to produce bound states. Indeed, it has been suggested that $\\bar K$-nuclear clusters may form, such as the $\\bar K NN$ in isospin $I=1\/2$. The $I=1\/2$ $\\bar K NN$ state has been extensively studied, both theoretically and experimentally, as shown in the left-hand side of {\\bf Fig.~\\ref{fig1}} (see Ref.~\\cite{Nagae:2016cbm} and references therein). This state was initially seen by the FINUDA \\cite{Agnello:2005qj}, DISTO \\cite{Yamazaki:2010mu} and OBELIX \\cite{Bendiscioli:2009zz} Collaborations, but could be explained by means of conventional processes \\cite{Ramos:2008zza} or not be reproduced \\cite{Agakishiev:2014dha}. Experiments performed by the Spring8\/LEPS \\cite{Tokiyasu:2013mwa}, J-PARC E15 \\cite{Hashimoto:2014cri}, AMADEUS \\cite{Doce:2015ust} Collaborations do not find any state, or, if found \\cite{Nagae:2016cbm,Ichikawa:2014ydh}, may have other interpretation, such as as a possible $I=3\/2$, $J^{\\pi}=2^+$ resonance near the $\\pi \\Sigma N$ threshold \\cite{Garcilazo:2012rh}. More recently, the J-PARC E15 experiment has found a structure near the $\\bar K NN$ threshold \\cite{Sada:2016nkb}, that has been interpreted as a $\\bar K NN$ bound state with a binding energy of $\\sim 20-40$ MeV \\cite{Sekihara:2016vyd}. \n\n\n\n\\subsection{$\\bar K N$ in matter}\n\n\nAntikaonic atoms \\cite{Friedman:2016rfd} give us information on the antikaon interaction with nucleons. The antikaon-nucleus potential has been extracted from best-fit analysis of antikaonic-atom data and some solutions agree with a very strongly attractive potential of the order of -200 MeV at normal saturation density $\\rho_0$. However, some criticism has been raised because the antikaonic-atom data only tests matter at the surface of the nucleus. Recent analysis on $K^- N$ scattering amplitudes from chiral SU(3) effective field theories supplemented with phenomenological terms for $K^-$ multinucleon interactions indicate that antikaonic atoms are insensitive to densities above $\\rho_0$ \\cite{Friedman:2016rfd}.\n\nEarly works based on relativistic mean-field models \\cite{Schaffner:1996kv} also obtained very deep potentials of a few hundreds of MeVs at $\\rho_0$. However, later approaches on unitarized theories in coupled channels based on the chiral effective theory \\cite{Lutz:1997wt,Ramos:1999ku} or on meson-exchange potentials \\cite{Tolos:2000fj,Tolos:2002ud} obtain a potential much less attractive. In fact, in the unitarized coupled-channels models, the attraction is a consequence of the modified $s$-wave $\\Lambda(1405)$ resonance in the medium due to Pauli blocking \\cite{Koch:1994mj}, together with the self-consistent inclusion of the $\\bar K$ self-energy \\cite{Lutz:1997wt}, and the implementation of self-energies of the mesons and baryons in the intermediate states \\cite{Ramos:1999ku}. As a result, the $\\bar K$ spectral function can be obtained, as shown in the right-hand side of Fig.~\\ref{fig1}. The $\\bar K$ spectral function shows that $\\bar K$ in matter feel a slight attraction while acquiring a remarkable width. Moreover, the knowledge of higher-partial waves beyond $s$-wave \\cite{Tolos:2008di,Tolos:2006ny,Lutz:2007bh,Cabrera:2009qr,Cabrera:2014lca} becomes essential for analyzing the results of HiCs at beam energies below 2GeV per nucleon \\cite{Cassing:2003vz,Tolos:2003qj}. \n\n\\subsection{Strangeness production in HICs}\n\nThe production of $K$ and $\\bar K$ close to threshold has been extensively investigated in low-energy HICs by the KaoS \\cite{Forster:2007qk}, FOPI \\cite{Lopez:2010mb} and HADES Collaborations \\cite{Agakishiev:2014moo}. The analysis of experimental data together with microscopic transport approaches have permitted drawing several conclusions regarding the production mechanisms and the freeze-out conditions of strange mesons. However, the role of the in-medium properties of strange hadrons in their production and production in HiCs is still an open question. Recent results from HADES and FOPI indicate that, while the $K^+$ shows a repulsive interaction in matter, the $\\Phi$ decay into $K^-$ washes out the effects of the $K^-$ potential in the spectra and flow \\cite{leifels}. Therefore, more systematic and high statistic data on $K^-$ production are necessary, while further information in elementary reactions is required.\n\n\n\\section{Open charm in nuclei}\n\nThe interest in the properties of open and hidden charmed mesons was triggered in HiCs due to the possible charmonium suppression as a probe for the formation of quark-gluon plasma (QGP). Nowadays, the nature of newly observed baryon and meson states with the charm degree of freedom is a matter of high interest in connection with many on-going experiments, such as BESIII, BelleII, ALICE, LHCb, amongst others, as well as with planned facilities, e.g. FAIR, NICA and the J-PARC upgrade. The goal is to understand whether these states can be accommodated within the quark model picture and\/or qualify better as being dynamically generated via hadron-hadron scattering processes. To this end, a large part of the experimental program in hadronic physics at PANDA (FAIR) will be devoted to charmonium spectroscopy. Also, the CBM (FAIR) experiment will extend the GSI program for in-medium modification of hadrons in the light quark sector and provide the first insight into the charm-nucleus interaction. Indeed, the influence of medium modifications in the charmonium production at finite baryon densities would affect the formation of the QGP phase of QCD at high densities.\n\n\\subsection{ $D N$ interaction: the $\\Lambda_c(2595)$}\n\nGiven the success of unitarized coupled-channel approaches in the description of some of the existing experimental data in the light-quark sector, the charm degree of freedom has been recently incorporated in these models and several experimental states have been described as dynamically-generated baryon molecules (see Ref.~\\cite{Tolos:2013gta} and references therein). This is the case, for example, of the $\\Lambda_c(2595)$, which is the charmed counterpart of the $\\Lambda(1405)$. \n\nWhereas a separable potential for the bare meson-baryon interaction with no strange degree of freedom was assumed in \\cite{Tolos:2004yg}, later on unitarized approaches were based on a bare meson-baryon interaction saturated with the $t$-channel exchange of vector mesons between pseudoscalar mesons and baryons in the zero-range approximation \\cite{Hofmann:2005sw,Mizutani:2006vq} or using the full $t$-dependence \\cite{JimenezTejero:2009vq}. Other approaches have made use of the J\\\"ulich meson-exchange model \\cite{Haidenbauer:2010ch,Haidenbauer:2007jq}, while some others have relied on the hidden gauge formalism \\cite{Wu:2010jy, Oset:2012ap}. More recent schemes incorporate heavy-quark symmetry constraints explicitly, such as those based on a pion-exchange model \\cite{Yamaguchi:2013ty,Hosaka:2016ypm} or on an extended Weinberg-Tomozawa interaction for four flavors that includes pseudoscalar and vector mesons together with $1\/2^+$ and $3\/2^+$ baryons \\cite{GarciaRecio:2008dp, Gamermann:2010zz, Romanets:2012hm,Garcia-Recio:2013gaa}. In all these unitarized coupled-channel models, the $\\Lambda_c(2595)$ is obtained dynamically, some of them \\cite{Haidenbauer:2010ch,Hofmann:2005sw,GarciaRecio:2008dp, Gamermann:2010zz, Romanets:2012hm,Garcia-Recio:2013gaa} even obtaining a double-pole structure, in a similar manner as found for the $\\Lambda(1405)$.\n\n\\subsection{ $\\bar D N$ interaction}\n\nThe $C=-1$ has also been investigated within unitarized coupled-channel models, pion-exchange schemes with heavy-quark symmetry constraints or chiral quark models \\cite{Hofmann:2005sw,Haidenbauer:2007jq,Yamaguchi:2013ty,Hosaka:2016ypm,Gamermann:2010zz,Carames:2012bd}. Interestingly, some of the models find a $J=1\/2$ state close to the $\\bar DN$ threshold \\cite{Yamaguchi:2013ty,Hosaka:2016ypm,Gamermann:2010zz}. In Ref.~\\cite{Gamermann:2010zz} this state was generated by the $ \\bar D N$ and $\\bar D^* N$ coupled channel dynamics, and it appears to be a consequence of treating heavy pseudoscalars and heavy vector mesons on an equal footing, because no resonance would be generated unless $\\bar D^* N$ channel is considered.\n\n\\subsection{ $D NN$ and $\\bar D NN$ states} \n\nGiven the fact that the $DN$ and $\\bar DN$ interactions are so attractive that allow for the formation of bound states, the question arises whether $D$ or $\\bar D$-nuclear clusters may form.\nIn \\cite{Bayar:2012dd} a $I=1\/2$, $J=0^-$ $DNN$ state was found with mass $~3500$ MeV and width $\\sim 20-40$ MeV, being interpreted as a quasibound state of the $\\Lambda_c(2595)$ and a nucleon. Moreover, in \\cite{Yamaguchi:2013hsa} a state with $I=1\/2$, $J=0^-$ and 5.2 MeV binding was found together with a $I=1\/2$, $J=1^-$ state at 111.2 MeV above threshold.\n\n\\begin{figure}[t]\n\\begin{center}\n\\includegraphics[width=.3\\textwidth,angle =-90]{D0.eps}\n\\end{center}\n\\caption{$D^0$-nucleus bound states, taken from \\cite{GarciaRecio:2010vt}. }\n\\label{figd0}\n\\end{figure}\n\n\\begin{figure}[t]\n\\begin{center}\n\\includegraphics[width=.45\\textwidth]{Dmenos.eps}\n\\includegraphics[width=.45\\textwidth]{D0bar.eps}\n\\end{center}\n\\caption{$D^-$ and $\\bar D^0$- nucleus bound states, taken from \\cite{GarciaRecio:2011xt}.}\n\\label{figdm}\n\\end{figure}\n\n\\subsection{Open charm in matter}\n\nThe properties of open-charm mesons in matter have been object of theoretical interest due to the consequences for charmonium suppression, as observed at SPS energies by the NA50 collaboration. The change of the properties of D mesons in matter would modify the $J\/\\Psi$ absorption in a hot and dense nuclear medium and can provide an explanation for $J\/\\Psi$ suppression. Furthermore, there have been speculations about the existence of $D$-meson bound states in nuclei \\cite{Tsushima:1998ru}. \n\nSeveral theoretical works have addressed the properties of open-charm mesons in dense baryonic matter: QMC schemes \\cite{Tsushima:1998ru}, QCD sum-rule approaches \\cite{Hayashigaki:2000es,Hilger:2011cq, Suzuki:2015est}, NJL models \\cite{Blaschke:2011yv}, chiral effective models in hot and dense matter \\cite{Mishra:2003se} or pion-exchange approaches with heavy-quark symmetry constraints \\cite{Yasui:2012rw}. The full spectral features (mass and width) of the open-charm mesons in dense nuclear matter have been obtained in self-consistent unitarized coupled-channel schemes, where the intermediate meson-baryon propagators contain different sources of density dependence \\cite{GarciaRecio:2011xt,Tolos:2004yg,Mizutani:2006vq,Tolos:2005ft,Lutz:2005vx,Tolos:2007vh,JimenezTejero:2011fc,Tolos:2009nn}.\n\n\n\\subsection{D-mesic nuclei}\nA possible experimental scenario for the detection of the changes in matter of the properties of open-charm mesons would be the formation of D-mesic nuclei, where a D-meson binds in nuclear orbits.\nIn fact, $D$ and $\\bar D$-meson bound states in $^{208}$Pb were predicted in Ref.~\\cite{Tsushima:1998ru}, relying upon an attractive $D$ and $\\bar D$ -meson potential in the nuclear medium, obtained \nwithin a QMC model. \n\nWithin the unitarized coupled-channel model of Ref.~\\cite{GarciaRecio:2010vt}, it is found that $D^0$-nucleus states are weakly bound (see Fig.~\\ref{figd0}), in contrast to previous results using the QMC model \\cite{Tsushima:1998ru}, while having significant widths. The best chances for observation of bound states are in the region of $^{24}\\mbox{Mg}$, provided an orbital angular momentum separation can be done, where there is only one $s-$ bound state and its half width is about a factor of two smaller than the binding energy (see Fig.~\\ref{figd0}). With regards to $D^+$-nuclear states, the Coulomb interaction prevents the formation of observable bound states. As for $\\bar D$-mesic nuclei, not only $D^-$ but also $\\bar{D}^0$ bind in nuclei as seen in Fig.~\\ref{figdm}. The spectrum contains states of atomic and of nuclear types for all nuclei for $D^-$ while, as expected, only nuclear states are present for $\\bar{D}^0$ in nuclei. Compared to the pure Coulomb levels, the atomic states are less bound. The nuclear ones are more bound and may present a sizable width, existing only for low angular momenta \\cite{GarciaRecio:2011xt}. This is in contrast to \\cite{Tsushima:1998ru} for $^{208}$Pb, but close to results in \\cite{Yasui:2012rw}. \n\nThe experimental detection of $D$ and $\\bar D$-meson bound states is, though, a difficult task \\cite{Yamagata-Sekihara:2015ebw}. The formation of D-mesic nuclei with antiprotons beams at PANDA (FAIR) might be possible if ($\\bar p$, D+N) or ($\\bar p$, D+2N) reactions with small or even zero momentum transfer are produced, although the formation cross sections could be suppressed because of the complexity of the reaction mechanisms. Other competing mechanisms could involve the emission of pions by intermediate $D^*$ or $\\bar D^*$ with subsequent trapping of the pseudoscalar charmed mesons by the final nucleus \\cite{Yamagata-Sekihara:2015ebw}.\n\n\\section*{Acknowledgements}\nL.T. acknowledges support from the Heisenberg Programme of the Deutsche Forschungsgemeinschaft under the Project Nr. 383452331, the Ram\\'on y Cajal research programme, FPA2013-43425-P and FPA2016-81114-P Grants from MINECO, and THOR COST Action CA15213.\n\n\n\\bibliographystyle{JHEP}\n","meta":{"arxiv_id":"1711.10711","language":"en","source":"arxiv","timestamp":1512032756000,"url":"https:\/\/arxiv.org\/abs\/1711.10711","yymm":"1711"}} +{"text":"\\section{Introduction}\n\nOver the last decades the properties of hadrons in nuclei have been matter of intense investigation. On the one hand, it is believed that hadrons in nuclei is an excellent scenario to test certain symmetries of the \ntheory of the strong interaction, Quantum Chromodynamics (QCD), such as the chiral symmetry in the low-energy regime or heavy-quark symmetries as hadrons with charm or beauty content are produced in the laboratory. On the other hand, it is of crucial importance to understand the excitation mechanisms in the nucleus as well as the nature of certain excited hadronic states, whose structure could be studied in a hot and\/or dense nuclear medium.\n\nIn order to address hadrons in nuclei one can resort to theoretical and\/or experimental analyzes \\cite{Metag:2017yuh,Rapp:2011zz}. From the theoretical side, there is an extensive variety of models that aim at understanding the properties of hadrons in nuclei, ranging from relativistic-mean field models (RMF), Nambu-Jona-Lasinio schemes (NJL), quark-meson coupling models (QMC), QCD sum-rule studies to unitarized approaches based on effective theories or meson-exchange models. Experimentally, photon-, electron-, neutrino- and hadron-induced reactions as well as heavy-ion collisions (HiCs) offer a gateway to the properties of hadrons in nuclei. The measurement of transparency ratios is extremely useful for the analysis of the imaginary part of the hadron-nucleon interaction in matter, whereas the excitation functions and the meson-momentum distributions are of fundamental importance to understand the real part of the interaction \\cite{Metag:2017yuh}. Moreover, it is crucial to connect the theoretical predictions to the experimental results, using transport model calculations or collision models based on nuclear spectral functions, to fully understand the dynamics of hadrons in nuclei.\n\nIn this paper we concentrate on the analysis of the properties of mesons with strangeness and charm content that interact with nuclei. Nowadays, strange and charmed hadrons are being produced in nuclear and particle facilities, such as GSI, CERN or RHIC, while they are the subject of future experimental programs, such as FAIR, NICA or J-PARC.\n\n\\section{Strangeness in nuclei}\n\nStrangeness in nuclei has received a lot attention in connection with the study of neutron stars interior \\cite{Watts:2016uzu}, the properties of exotic atoms \\cite{Friedman:2016rfd}, and strangeness production in heavy-ion collisions (HICs) \\cite{Hartnack:2011cn}. In particular, the dynamics of strange mesons, such as $\\bar K$, in vacuum and in the nuclear medium is still a challenge for theory and experiments. In this section the $\\bar K N$ interaction is studied, paying a special attention to the role of the $\\Lambda(1405)$ and the formation of bound states, such as $\\bar KNN$. Moreover, the production and propagation of strangeness in heavy-ion collisions (HICs) is investigated, in view of the present and forthcoming experimental programs on strangeness.\n\n\\subsection{$\\bar K N$ interaction: the $\\Lambda(1405)$}\n\nThe $\\bar K N$ scattering in the $I=0$ channel is governed by the presence of the $\\Lambda(1405)$, located only 27 MeV below the $\\bar K N$ threshold. The dynamical origin of the $\\Lambda(1405)$ dates back more than 50 years ago to the work of Dalitz and Tuan \\cite{Dalitz:1959dn}. Recently, it has been revisited by means of unitarized theories using meson-exchange models \\cite{MuellerGroeling:1990cw,Haidenbauer:2010ch} or chiral Lagrangians \\cite{Kaiser:1995eg, Oset:1997it, Oller:2000fj,Lutz:2001yb,GarciaRecio:2002td,Borasoy:2005ie,Oller:2006jw}, these latter ones analyzing the effects of including a complete basis of meson-baryon channels, studying the differences in the regularization of the equations, including s- and u-channel Born terms in the Lagrangian, implementing next-to-leading (NLO) contributions,... . All these recent efforts have culminated in establishing the $\\Lambda(1405)$ as a superposition of two poles of the scattering matrix \\cite{Jido:2003cb}, that are generated dynamically from the unitarized coupled-channel scheme.\n\nA renewed interest in the $\\bar K N$ interaction has been developed in the past years after the availability of a more precise measurement of the energy shift and width of the $1s$ state in kaonic hydrogen by the SIDDHARTA Collaboration at DA$\\Phi$NE \\cite{Bazzi:2011zj}, that has helped to clarify the discrepancies between the KEK \\cite{Iwasaki:1997wf,Ito:1998yi} and the DEAR \\cite{Beer:2005qi,Cargnelli:2005cf} measurements. The obtained value of the energy shift is $\\Delta E = 283 \\pm 36 \\pm 6$ eV with a width of $\\Gamma=541 \\pm 89 \\pm 22$ eV, in good agreement with KEK results. Furthermore, the SIDDHARTA measurement has provided new constraints on the theoretical predictions reported in \\cite{Ikeda:2011pi,Guo:2012vv,Mai:2012dt,Feijoo:2015yja}. \n\n\n\\begin{figure}[t]\n\\includegraphics[width=.5\\textwidth]{nagae.eps}\n\\includegraphics[width=.5\\textwidth]{Kbar.eps}\n\\caption{Left: Comparison of the binding energy and width of the $K^-pp$ between experiments and theoretical predictions, taken from \\cite{Nagae:2016cbm}. Right: $\\bar K$ spectral function for different densities, temperatures and momenta, taken from \\cite{Tolos:2008di}.}\n\\label{fig1}\n\\end{figure}\n\n\\subsection{$\\bar K NN$ state} \n\nThe dynamical generation of the two-pole structure of the $\\Lambda(1405)$ indicates that the $\\bar K N$ interaction might be attractive enough to produce bound states. Indeed, it has been suggested that $\\bar K$-nuclear clusters may form, such as the $\\bar K NN$ in isospin $I=1\/2$. The $I=1\/2$ $\\bar K NN$ state has been extensively studied, both theoretically and experimentally, as shown in the left-hand side of {\\bf Fig.~\\ref{fig1}} (see Ref.~\\cite{Nagae:2016cbm} and references therein). This state was initially seen by the FINUDA \\cite{Agnello:2005qj}, DISTO \\cite{Yamazaki:2010mu} and OBELIX \\cite{Bendiscioli:2009zz} Collaborations, but could be explained by means of conventional processes \\cite{Ramos:2008zza} or not be reproduced \\cite{Agakishiev:2014dha}. Experiments performed by the Spring8\/LEPS \\cite{Tokiyasu:2013mwa}, J-PARC E15 \\cite{Hashimoto:2014cri}, AMADEUS \\cite{Doce:2015ust} Collaborations do not find any state, or, if found \\cite{Nagae:2016cbm,Ichikawa:2014ydh}, may have other interpretation, such as as a possible $I=3\/2$, $J^{\\pi}=2^+$ resonance near the $\\pi \\Sigma N$ threshold \\cite{Garcilazo:2012rh}. More recently, the J-PARC E15 experiment has found a structure near the $\\bar K NN$ threshold \\cite{Sada:2016nkb}, that has been interpreted as a $\\bar K NN$ bound state with a binding energy of $\\sim 20-40$ MeV \\cite{Sekihara:2016vyd}. \n\n\n\n\\subsection{$\\bar K N$ in matter}\n\n\nAntikaonic atoms \\cite{Friedman:2016rfd} give us information on the antikaon interaction with nucleons. The antikaon-nucleus potential has been extracted from best-fit analysis of antikaonic-atom data and some solutions agree with a very strongly attractive potential of the order of -200 MeV at normal saturation density $\\rho_0$. However, some criticism has been raised because the antikaonic-atom data only tests matter at the surface of the nucleus. Recent analysis on $K^- N$ scattering amplitudes from chiral SU(3) effective field theories supplemented with phenomenological terms for $K^-$ multinucleon interactions indicate that antikaonic atoms are insensitive to densities above $\\rho_0$ \\cite{Friedman:2016rfd}.\n\nEarly works based on relativistic mean-field models \\cite{Schaffner:1996kv} also obtained very deep potentials of a few hundreds of MeVs at $\\rho_0$. However, later approaches on unitarized theories in coupled channels based on the chiral effective theory \\cite{Lutz:1997wt,Ramos:1999ku} or on meson-exchange potentials \\cite{Tolos:2000fj,Tolos:2002ud} obtain a potential much less attractive. In fact, in the unitarized coupled-channels models, the attraction is a consequence of the modified $s$-wave $\\Lambda(1405)$ resonance in the medium due to Pauli blocking \\cite{Koch:1994mj}, together with the self-consistent inclusion of the $\\bar K$ self-energy \\cite{Lutz:1997wt}, and the implementation of self-energies of the mesons and baryons in the intermediate states \\cite{Ramos:1999ku}. As a result, the $\\bar K$ spectral function can be obtained, as shown in the right-hand side of Fig.~\\ref{fig1}. The $\\bar K$ spectral function shows that $\\bar K$ in matter feel a slight attraction while acquiring a remarkable width. Moreover, the knowledge of higher-partial waves beyond $s$-wave \\cite{Tolos:2008di,Tolos:2006ny,Lutz:2007bh,Cabrera:2009qr,Cabrera:2014lca} becomes essential for analyzing the results of HiCs at beam energies below 2GeV per nucleon \\cite{Cassing:2003vz,Tolos:2003qj}. \n\n\\subsection{Strangeness production in HICs}\n\nThe production of $K$ and $\\bar K$ close to threshold has been extensively investigated in low-energy HICs by the KaoS \\cite{Forster:2007qk}, FOPI \\cite{Lopez:2010mb} and HADES Collaborations \\cite{Agakishiev:2014moo}. The analysis of experimental data together with microscopic transport approaches have permitted drawing several conclusions regarding the production mechanisms and the freeze-out conditions of strange mesons. However, the role of the in-medium properties of strange hadrons in their production and production in HiCs is still an open question. Recent results from HADES and FOPI indicate that, while the $K^+$ shows a repulsive interaction in matter, the $\\Phi$ decay into $K^-$ washes out the effects of the $K^-$ potential in the spectra and flow \\cite{leifels}. Therefore, more systematic and high statistic data on $K^-$ production are necessary, while further information in elementary reactions is required.\n\n\n\\section{Open charm in nuclei}\n\nThe interest in the properties of open and hidden charmed mesons was triggered in HiCs due to the possible charmonium suppression as a probe for the formation of quark-gluon plasma (QGP). Nowadays, the nature of newly observed baryon and meson states with the charm degree of freedom is a matter of high interest in connection with many on-going experiments, such as BESIII, BelleII, ALICE, LHCb, amongst others, as well as with planned facilities, e.g. FAIR, NICA and the J-PARC upgrade. The goal is to understand whether these states can be accommodated within the quark model picture and\/or qualify better as being dynamically generated via hadron-hadron scattering processes. To this end, a large part of the experimental program in hadronic physics at PANDA (FAIR) will be devoted to charmonium spectroscopy. Also, the CBM (FAIR) experiment will extend the GSI program for in-medium modification of hadrons in the light quark sector and provide the first insight into the charm-nucleus interaction. Indeed, the influence of medium modifications in the charmonium production at finite baryon densities would affect the formation of the QGP phase of QCD at high densities.\n\n\\subsection{ $D N$ interaction: the $\\Lambda_c(2595)$}\n\nGiven the success of unitarized coupled-channel approaches in the description of some of the existing experimental data in the light-quark sector, the charm degree of freedom has been recently incorporated in these models and several experimental states have been described as dynamically-generated baryon molecules (see Ref.~\\cite{Tolos:2013gta} and references therein). This is the case, for example, of the $\\Lambda_c(2595)$, which is the charmed counterpart of the $\\Lambda(1405)$. \n\nWhereas a separable potential for the bare meson-baryon interaction with no strange degree of freedom was assumed in \\cite{Tolos:2004yg}, later on unitarized approaches were based on a bare meson-baryon interaction saturated with the $t$-channel exchange of vector mesons between pseudoscalar mesons and baryons in the zero-range approximation \\cite{Hofmann:2005sw,Mizutani:2006vq} or using the full $t$-dependence \\cite{JimenezTejero:2009vq}. Other approaches have made use of the J\\\"ulich meson-exchange model \\cite{Haidenbauer:2010ch,Haidenbauer:2007jq}, while some others have relied on the hidden gauge formalism \\cite{Wu:2010jy, Oset:2012ap}. More recent schemes incorporate heavy-quark symmetry constraints explicitly, such as those based on a pion-exchange model \\cite{Yamaguchi:2013ty,Hosaka:2016ypm} or on an extended Weinberg-Tomozawa interaction for four flavors that includes pseudoscalar and vector mesons together with $1\/2^+$ and $3\/2^+$ baryons \\cite{GarciaRecio:2008dp, Gamermann:2010zz, Romanets:2012hm,Garcia-Recio:2013gaa}. In all these unitarized coupled-channel models, the $\\Lambda_c(2595)$ is obtained dynamically, some of them \\cite{Haidenbauer:2010ch,Hofmann:2005sw,GarciaRecio:2008dp, Gamermann:2010zz, Romanets:2012hm,Garcia-Recio:2013gaa} even obtaining a double-pole structure, in a similar manner as found for the $\\Lambda(1405)$.\n\n\\subsection{ $\\bar D N$ interaction}\n\nThe $C=-1$ has also been investigated within unitarized coupled-channel models, pion-exchange schemes with heavy-quark symmetry constraints or chiral quark models \\cite{Hofmann:2005sw,Haidenbauer:2007jq,Yamaguchi:2013ty,Hosaka:2016ypm,Gamermann:2010zz,Carames:2012bd}. Interestingly, some of the models find a $J=1\/2$ state close to the $\\bar DN$ threshold \\cite{Yamaguchi:2013ty,Hosaka:2016ypm,Gamermann:2010zz}. In Ref.~\\cite{Gamermann:2010zz} this state was generated by the $ \\bar D N$ and $\\bar D^* N$ coupled channel dynamics, and it appears to be a consequence of treating heavy pseudoscalars and heavy vector mesons on an equal footing, because no resonance would be generated unless $\\bar D^* N$ channel is considered.\n\n\\subsection{ $D NN$ and $\\bar D NN$ states} \n\nGiven the fact that the $DN$ and $\\bar DN$ interactions are so attractive that allow for the formation of bound states, the question arises whether $D$ or $\\bar D$-nuclear clusters may form.\nIn \\cite{Bayar:2012dd} a $I=1\/2$, $J=0^-$ $DNN$ state was found with mass $~3500$ MeV and width $\\sim 20-40$ MeV, being interpreted as a quasibound state of the $\\Lambda_c(2595)$ and a nucleon. Moreover, in \\cite{Yamaguchi:2013hsa} a state with $I=1\/2$, $J=0^-$ and 5.2 MeV binding was found together with a $I=1\/2$, $J=1^-$ state at 111.2 MeV above threshold.\n\n\\begin{figure}[t]\n\\begin{center}\n\\includegraphics[width=.3\\textwidth,angle =-90]{D0.eps}\n\\end{center}\n\\caption{$D^0$-nucleus bound states, taken from \\cite{GarciaRecio:2010vt}. }\n\\label{figd0}\n\\end{figure}\n\n\\begin{figure}[t]\n\\begin{center}\n\\includegraphics[width=.45\\textwidth]{Dmenos.eps}\n\\includegraphics[width=.45\\textwidth]{D0bar.eps}\n\\end{center}\n\\caption{$D^-$ and $\\bar D^0$- nucleus bound states, taken from \\cite{GarciaRecio:2011xt}.}\n\\label{figdm}\n\\end{figure}\n\n\\subsection{Open charm in matter}\n\nThe properties of open-charm mesons in matter have been object of theoretical interest due to the consequences for charmonium suppression, as observed at SPS energies by the NA50 collaboration. The change of the properties of D mesons in matter would modify the $J\/\\Psi$ absorption in a hot and dense nuclear medium and can provide an explanation for $J\/\\Psi$ suppression. Furthermore, there have been speculations about the existence of $D$-meson bound states in nuclei \\cite{Tsushima:1998ru}. \n\nSeveral theoretical works have addressed the properties of open-charm mesons in dense baryonic matter: QMC schemes \\cite{Tsushima:1998ru}, QCD sum-rule approaches \\cite{Hayashigaki:2000es,Hilger:2011cq, Suzuki:2015est}, NJL models \\cite{Blaschke:2011yv}, chiral effective models in hot and dense matter \\cite{Mishra:2003se} or pion-exchange approaches with heavy-quark symmetry constraints \\cite{Yasui:2012rw}. The full spectral features (mass and width) of the open-charm mesons in dense nuclear matter have been obtained in self-consistent unitarized coupled-channel schemes, where the intermediate meson-baryon propagators contain different sources of density dependence \\cite{GarciaRecio:2011xt,Tolos:2004yg,Mizutani:2006vq,Tolos:2005ft,Lutz:2005vx,Tolos:2007vh,JimenezTejero:2011fc,Tolos:2009nn}.\n\n\n\\subsection{D-mesic nuclei}\nA possible experimental scenario for the detection of the changes in matter of the properties of open-charm mesons would be the formation of D-mesic nuclei, where a D-meson binds in nuclear orbits.\nIn fact, $D$ and $\\bar D$-meson bound states in $^{208}$Pb were predicted in Ref.~\\cite{Tsushima:1998ru}, relying upon an attractive $D$ and $\\bar D$ -meson potential in the nuclear medium, obtained \nwithin a QMC model. \n\nWithin the unitarized coupled-channel model of Ref.~\\cite{GarciaRecio:2010vt}, it is found that $D^0$-nucleus states are weakly bound (see Fig.~\\ref{figd0}), in contrast to previous results using the QMC model \\cite{Tsushima:1998ru}, while having significant widths. The best chances for observation of bound states are in the region of $^{24}\\mbox{Mg}$, provided an orbital angular momentum separation can be done, where there is only one $s-$ bound state and its half width is about a factor of two smaller than the binding energy (see Fig.~\\ref{figd0}). With regards to $D^+$-nuclear states, the Coulomb interaction prevents the formation of observable bound states. As for $\\bar D$-mesic nuclei, not only $D^-$ but also $\\bar{D}^0$ bind in nuclei as seen in Fig.~\\ref{figdm}. The spectrum contains states of atomic and of nuclear types for all nuclei for $D^-$ while, as expected, only nuclear states are present for $\\bar{D}^0$ in nuclei. Compared to the pure Coulomb levels, the atomic states are less bound. The nuclear ones are more bound and may present a sizable width, existing only for low angular momenta \\cite{GarciaRecio:2011xt}. This is in contrast to \\cite{Tsushima:1998ru} for $^{208}$Pb, but close to results in \\cite{Yasui:2012rw}. \n\nThe experimental detection of $D$ and $\\bar D$-meson bound states is, though, a difficult task \\cite{Yamagata-Sekihara:2015ebw}. The formation of D-mesic nuclei with antiprotons beams at PANDA (FAIR) might be possible if ($\\bar p$, D+N) or ($\\bar p$, D+2N) reactions with small or even zero momentum transfer are produced, although the formation cross sections could be suppressed because of the complexity of the reaction mechanisms. Other competing mechanisms could involve the emission of pions by intermediate $D^*$ or $\\bar D^*$ with subsequent trapping of the pseudoscalar charmed mesons by the final nucleus \\cite{Yamagata-Sekihara:2015ebw}.\n\n\\section*{Acknowledgements}\nL.T. acknowledges support from the Heisenberg Programme of the Deutsche Forschungsgemeinschaft under the Project Nr. 383452331, the Ram\\'on y Cajal research programme, FPA2013-43425-P and FPA2016-81114-P Grants from MINECO, and THOR COST Action CA15213.\n\n\n\\bibliographystyle{JHEP}\n","meta":{"arxiv_id":"1711.10711","language":"en","source":"arxiv","timestamp":1512032756000,"url":"https:\/\/arxiv.org\/abs\/1711.10711","yymm":"1711"}} {"text":"\\section{Robbins quartic equation}\n\n$$x^4+x^2-Tx+1=0.$$\n\nWe have a solution in $\\mathbb{F}(p)$ for all primes $p$. This solution is expanded in continued fraction and we have\n$$\\alpha=[0,a_1,a_2,a_3,\\cdots,a_n,\\cdots]$$\nBelow is the list of the first partial quotients. First, is the list of 10 full partial quotients. Then below is a list of the first 300 partial quotients only given by the degrees.\n\n\\vskip 1 cm \n\\par $\\bullet$ : $p=2$\n\\begin{verbatim}\n\n [t, t, t^5 + t, t, t, t, t, t, t^5 + t, t]\n\ndegrees [1, 1, 5, 1, 1, 1, 1, 1, 5, 1, 9, 1, 9, 1, 1, 1, 1, 1, 1, 1,\n\n17, 1, 9, 1, 5, 1, 1, 1, 13, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 13, 1,\n\n17, 1, 1, 1, 5, 1, 13, 1, 5, 1, 1, 1, 5, 1, 5, 1, 5, 1, 1, 1, 1, 1,\n\n13, 1, 5, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 5, 1, 1, 1, 5, 1, 1,\n\n1, 9, 1, 1, 1, 1, 1, 5, 1, 9, 1, 5, 1, 1, 1, 41, 1, 1, 1, 5, 1, 9, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, \n\n1, 13, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 1, 5, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 13, 1, 5, 1, 9, 1, 1, 1, 1, 1, 29, 1, 5,\n\n1, 21, 1, 13, 1, 17, 1, 5, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 5, 1,\n\n1, 1, 5, 1, 13, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 17, 1, 1, 1, 5, 1, 1,\n\n1, 13, 1, 5, 1, 1, 1, 9, 1, 13, 1, 1, 1, 1, 1, 5, 1, 9, 1, 9, 1, 5, 1,\n\n1, 1, 5, 1, 5, 1, 5, 1, 1, 1, 5, 1, 65, 1, 9, 1, 1, 1, 1, 1, 5, 1, 5, \n\n1, 1, 1, 1, 1, 1, 1]\n\n\n\\end{verbatim}\n\\vskip 1 cm \n\\par $\\bullet$ : $p=3$\n\\begin{verbatim}\n\n\n[t, 2*t, 2*t, t, 2*t, t^3, 2*t, t, 2*t, 2*t]\n\ndegrees [1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1,\n\n1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3,\n\n3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1,\n\n1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3,\n\n3, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1,\n\n1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1,\n\n1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,\n\n1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1,\n\n1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 3,\n\n3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 27, 9, 9, 9,\n\n9, 9, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 3, 3,\n\n3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,\n\n1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3,\n\n3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1,\n\n1, 3, 3, 3, 3, 1, 1, 1, 1]\n\n\n\\end{verbatim}\n\\vskip 1 cm \n\\par $\\bullet$ : $p=5$\n\\begin{verbatim}\n\n[t, 4*t, 3*t, 3*t, 4*t^3 + 4*t, 2*t, 3*t^3 + t, 3*t, 3*t^5 + 3*t^3, 4*t^3 + 2*t]\n\ndegrees [1, 1, 1, 1, 3, 1, 3, 1, 5, 3, 3, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1,\n\n1, 3, 1, 3, 1, 1, 3, 1, 1, 1, 3, 5, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1,\n\n1, 1, 1, 3, 1, 5, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1,\n\n1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 5, 3, 1, 1, 1, 1,\n\n1, 1, 3, 3, 3, 1, 1, 3, 3, 3, 1, 1, 1, 3, 1, 1, 5, 1, 1, 1, 1, 3, 3, 3,\n\n1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1,\n\n1, 1, 3, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 5,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 3, 7, 1, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 1,\n\n3, 1, 1, 1, 1, 1, 1, 5, 1, 3, 1, 1, 1, 5, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, \n\n1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 3,\n\n1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1,\n\n1, 3, 1, 1, 7, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1]\n\n\n\n\\end{verbatim}\n\\vskip 1 cm \n\\par $\\bullet$ : $p=7$\n\\begin{verbatim}\n\n[t, 6*t, 4*t, 5*t, 3*t, 2*t, t, 2*t, t, 2*t]\n\ndegrees [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1,\n\n1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 3, 1, 1, 3, 3, 3, 1, 1, 7, 1, 3, 1,\n\n3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, \n\n1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 3, 3, 1, 1, 1, 1,\n\n1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1,\n\n1, 1, 5, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1,\n\n1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1,\n\n3, 1, 1, 3, 1, 3, 1, 1, 1, 1, 3, 1, 3, 1]\n\n\\end{verbatim}\n\n\\vskip 1 cm \n\\par $\\bullet$ : $p=11$\n\\begin{verbatim}\n [t, 10*t, 6*t, 9*t, 9*t, 9*t, 3*t^3 + 5*t, 8*t, 2*t, 3*t]\n \ndegrees [1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 3, 1, 1, 3, \n\n1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,\n\n3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 3, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 3, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1]\n\n\n\\end{verbatim}\n\\vskip 1 cm \n\\par $\\bullet$ : $p=13$\n\\begin{verbatim}\n\n[t, 12*t, 7*t, 11*t, 8*t, 5*t, t^5 + 7*t^3 + 3*t, 3*t, 9*t, 4*t]\n\ndegrees [1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5,\n\n1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1,\n\n1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1,\n\n1, 1, 1, 1, 1, 1, 57, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1,\n\n1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1,\n\n1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1,\n\n1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1,\n\n1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 57, 1, 1, 1, 1, 1, 1, 1, \n\n1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, \n\n5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5,\n\n1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1,\n\n1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 57, 1, 1,\n\n1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1,\n\n1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1,\n\n1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1,\n\n1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1]\n\n\n\\end{verbatim}\n\n\n\\section{Modified Robbins quartic equation}\n\n$$x^4+x^2-Tx-1\/12=0\\quad \\text{ for} \\quad p>3.$$\n\nWe have a solution in $\\mathbb{F}(p)$ for all primes $p>3$. This solution is expanded in continued fraction and we have\n$$\\alpha=[0,a_1,a_2,a_3,\\cdots,a_n,\\cdots]$$\nIt was proved that this solution is hyperquadratic of order $1$ if $p\\equiv 1 \\mod 3$ and hyperquadratic of order $2$ if $p\\equiv 2 \\mod 3$.\n\\newline Here below we show the list of the first 300 partial quotients (only given by their degrees) for the first four values of $p$ : $5,7,11$ and $13$ (also a list of the first partial quotients and secondly of their leading coefficients).\n\\vskip 0.5 cm \n\\par $\\bullet$ : $p=5$\n\\begin{verbatim}\ncfe [3*t, 4*t, t, t, t, t, 4*t, 3*t, 2*t, 3*t]\ndegrees [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 41, 9, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 41, 9, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 9, 41, 209, 1041, 209, 41, 9, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 41, 9, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 9, 41, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \n\n1, 1, 9, 41, 209, 41, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1]\n\nleading coefficients\n[3, 4, 1, 1, 1, 1, 4, 3, 2, 3, 2, 4, 2, 1, 4, 2, 4, 4, 4, 4]\n\\end{verbatim}\n\\vskip 1 cm \n\\par $\\bullet$ : $p=7$\n\\begin{verbatim}\ncfe [2*t, 6*t, 6*t, 3*t^3 + 6*t, 5*t, 3*t, 4*t, 2*t, 4*t^3 + t, t]\n\ndegrees [1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 17,\n\n1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1,\n\n1, 1, 17, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1,\n\n3, 1, 1, 1, 1, 17, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1,\n\n1, 1, 1, 3, 1, 1, 1, 1, 115, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1,\n\n1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 17, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3,\n\n1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 17, 1, 1, 1, 1, 3, 1, 1,\n\n1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 17, 1, 1, 1, 1,\n\n3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 17, 1,\n\n1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1,\n\n1, 115, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, \n\n3, 1, 1, 1, 1, 17, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1,\n\n1, 1, 1, 3, 1, 1, 1, 1, 17, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1,\n\n1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 17, 1, 1, 1, 1, 3, 1]\n\nleading coefficients\n[2, 6, 6, 3, 5, 3, 4, 2, 4, 1, 1, 5, 3, 2, 1, 1, 5, 3, 2, 6]\n\\end{verbatim}\n\n\\noindent {\\bf{Exercise 12:}} Let $\\beta=1\/\\alpha$. Show that we have\n$$(2T^2+2)\\beta^8+(3T^3+5T)\\beta^7+4T\\beta+1=0.$$\nUsing this last equation and also $\\beta=[2T,6T,6T,\\beta_4]$, prove that we have\n$$\\beta^7=3(T^2-1)^2\\beta_4+4T^3+2T.$$\nFinally, using Exercise 3, give $a_4$ to $a_8$ and prove that we also have\n$$\\beta_2^7=-(T^2-1)^2\\beta_9+3T^3+5T.$$\n\\vskip 1 cm \n\\par $\\bullet$ : $p=11$\n\\begin{verbatim}\ncfe [10*t, 10*t, 5*t^3 + t, 7*t, 8*t, 10*t, 7*t, t, 4*t, 9*t]\n\ndegrees [1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1,\n\n1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,\n\n1, 1, 1, 1, 1, 1, 3, 41, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, \n\n1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1,\n\n1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,\n\n1, 1, 1, 1, 1, 1, 3, 41, 443, 41, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, \n\n1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,\n\n1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1,\n\n3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 41, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1,\n\n1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1,\n\n1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3,\n\n1, 1, 1, 1, 1, 1, 1, 1, 3, 41, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,\n\n1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,\n\n1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1]\n\nleading coefficients\n[10, 10, 5, 7, 8, 10, 7, 1, 4, 9, 5, 1, 10, 10, 8, 6, 3, 5, 5, 9]\n\n\\end{verbatim}\n\\vskip 1 cm \n\\par $\\bullet$ : $p=13$\n\\begin{verbatim}\ncfe [t, 12*t, 7*t, 11*t, 8*t, 5*t, t^5 + 7*t^3 + 3*t, 3*t, 9*t, 4*t]\n\ndegrees [1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1,\n\n1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, \n\n5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 57, 1, 1, 1,\n\n1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, \n\n1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1,\n\n1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1,\n\n1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 57, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1,\n\n1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1,\n\n1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5,\n\n1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1,\n\n1, 1, 1, 1, 57, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1,\n\n5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, \n\n1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1,\n\n1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1]\n\nleading coefficients\n[1, 12, 7, 11, 8, 5, 1, 3, 9, 4, 3, 2, 6, 6, 11, 12, 9, 3, 12, 1]\n\\end{verbatim}\n\nThere are clearly two different patterns : cases $p=5$ and $p=11$ (more generally $p\\equiv 2 \\mod 3$) on one side and cases $p=7$ and $p=13$ (more generally $p\\equiv 1 \\mod 3$) on the other side.\n\\newline In each case ($p\\equiv 1 \\mod 3$ or $p\\equiv 2 \\mod 3$) the continued fraction is based on a particular polynomial in $\\mathbb{F}_p[T]$ : $P_{u,k}=(T^2+u)^k$ where $u\\in \\mathbb{F}_p^*$ and $k\\in \\mathbb{N}$ are two parameters depending on the prime $p$. In the first case, $p\\equiv 1 \\mod 3$, this means that the continued fraction $\\beta=1\/\\alpha$ is such that we have\n$$\\beta=[a_1,a_2,\\cdots,a_\\ell,\\beta_{\\ell+1}] \\quad \\text{and}\\quad \\beta^p=\\epsilon_1P_{u,k}\\beta_{\\ell+1}+\\epsilon_2R$$\nwhere $l\\geq 1$ is an integer, the pair $(\\epsilon_1,\\epsilon_2)\\in (\\mathbb{F}_p*)^2$, all depending on $p$, and $R\\in \\mathbb{F}_p[T]$ is the remainder in the Euclidean division of $T^p$ by $P_{u,k}$. Besides, the $\\ell$ first partial quotients are given and , in this case $p\\equiv 1 \\mod 3$, these are of the form $a_i=\\lambda_iT$ with $\\lambda_i\\in \\mathbb{F}_p^*$. In both cases, all the partial quotients, up to a constant factor, belong to a particular sequence (different according to the case considered) only depending on the polynomial $P_{u,k}$.\n\\newline The case $p\\equiv 1 \\mod 3$ enters in a much larger family which has been fully studied. While the second case enters into another family which has only been partially described (see \\cite{AL}).\n\\section{Generalization of case $p=3$ in Robbins quartic equation}\n\nAs usual we have $r=p^t$ with $t\\geq 1$ and $p$ odd.\n\\newline We set $$\\alpha=[T,T^r,T^{r^2},\\cdots,T^{r^n},\\cdots]\\quad \\text{and}\\quad \\beta=\\alpha^{(r+1)\/2}.$$\nWe have $\\alpha=T+1\/\\alpha^r$ and therefrom $T\\alpha^r=\\alpha^{r+1}-1$. Since $\\beta^2=\\alpha^{r+1}$, by elevating this formula to the power $(r+1)\/2$, we get\n$$(\\beta^2-1)^{(r+1)\/2}-T^{(r+1)\/2}\\beta^r=0.$$\nThis formula shows that $\\beta$ is an algebraic power series in the variable $T^{(r+1)\/2}$. We set $\\beta(T)=\\gamma(T^{(r+1)\/2})$, and $\\gamma$ satisfies\n$$(\\gamma^2-1)^{(r+1)\/2}-T\\gamma^r=0.$$\nNote that for $r=3$ we obtain :$\\gamma^4-T\\gamma^3 +\\gamma^2+1=0$ Hence $1\/gamma$ is the solution of robbins quartic equation for $p=3$.\n\\newline To observe a possible analogy between the different cases $r=3$ and $r=5$, we have given below the list of the first 300 partial quotients for the continued fraction of $\\gamma)$.\n\n\\vskip 1 cm \n\\par $\\bullet$ : $r=3$\n\\begin{verbatim}\ncfe [t, 2*t, 2*t, t, 2*t, t^3, 2*t, t, 2*t, 2*t]\n\ndegrees [1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1,\n\n1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3,\n\n3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1,\n\n1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3,\n\n3, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1,\n\n1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1,\n\n1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,\n\n1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1,\n\n1, 1, 1, 1, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 3, \n\n3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 9, 27, 9, 9, 9,\n\n9, 9, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 9, 9, 9, 9, 3, 3, \n\n3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,\n\n1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3,\n\n3, 3, 3, 9, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1,\n\n1, 3, 3, 3, 3, 1, 1, 1, 1]\n\nleading coefficients \n[1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2]\n\n\\end{verbatim}\n\\vskip 1 cm \n\\par $\\bullet$ : $r=5$\n\\begin{verbatim}\ncfe [t, 2*t, 2*t, 2*t, 2*t, t, 2*t, 4*t^3 + 4*t, 4*t, t]\n\ndegrees [1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 5, 1, 1, 1, 3, 1, 1,\n\n1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1,\n\n1, 1, 3, 1, 1, 1, 5, 5, 5, 5, 5, 5, 1, 1, 1, 3, 1, 1, 1, 3, 1,\n\n1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3,\n\n1, 1, 1, 5, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1,\n\n1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, \n\n1, 1, 5, 1, 1, 3, 1, 1, 5, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 3, 1,\n\n3, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 7, 1, 3, 1, 1, 1, 3, 1, 1, 1,\n\n1, 1, 1, 1, 3, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1,\n\n1, 3, 1, 1, 1, 3, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 15, 5, 5, 5, 25,\n\n5, 5, 5, 15, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1,\n\n3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1,\n\n1, 3, 1, 1, 1, 3, 1, 7, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 3, 1,\n\n1, 1, 1, 3, 1, 1, 3, 1, 1, 5, 1, 1, 3, 1, 1, 5, 1, 1, 1, 1, 1, 1,\n\n1, 3, 1, 1, 3, 1]\n\n\nleading coefficients\n[1, 2, 2, 2, 2, 1, 2, 4, 4, 1, 2, 1, 2, 1, 4, 4, 2, 1, 2, 2]\n\n\\end{verbatim}\n\n\\section{A last example of a perfect expansion in $\\mathbb{F}(5)$}\n\nHere we just give the list as previously for a particular $\\alpha$ defined by $$\\alpha=[T,T,T,\\alpha_4]\\quad \\text{ and }\\quad \\alpha^5=(T^2-1)^2\\alpha_4+3T^3+T.$$\n\n\\vskip 1 cm\n\\begin{verbatim}\ncfe [t, t, t, t, 4*t, 4*t, 4*t, 4*t, t, 2*t]\n\ndegrees [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, \n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 9, 41, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, \n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 41, 9, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 9, 41, 209, 41, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 41, 9, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 41,\n\n9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1,\n\n1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1]\n\n\nleading coefficients \n [1, 1, 1, 1, 4, 4, 4, 4, 1, 2, 3, 2, 3, 1, 1, 4, 2, 3, 2,\n\n3, 4, 1, 1, 1, 1, 4, 3, 2, 3, 2, 4, 1, 2, 4, 4, 4, 4, 3, 3, 2, 3, 2, \n\n2, 1, 1, 1, 1, 3, 1, 1, 4, 2, 4, 4, 4, 4, 3, 3, 2, 3, 2, 2, 1, 1, 1,\n\n1, 3, 4, 1, 3, 2, 3, 2, 1, 4, 4, 4, 4, 1, 2, 3, 2, 3, 1, 4, 3, 1, 1,\n\n1, 1, 2, 2, 3, 2, 3, 3, 4, 4, 4, 4, 2, 4, 1, 2, 4, 2, 3, 2, 3, 4, 1,\n\n1, 1, 1, 4, 3, 2, 3, 2, 4]\n\n\\end{verbatim}\n\nThis last example belongs to the same large family as the one presented at the end of Section 7. Up to a constant factor in $\\mathbb{F}_5^*$, the partial quotients are known to belong to a special sequence of unitary polynomials. The distribution of these polynomials in the list of partial quotients is understood and this implies that, in this particular case, the irrationality measure of $\\alpha$ is equal to $18\/7$. However the sequence of the leading coefficients, (see the last table of leading coefficients), is yet difficult to describe.\n\n\\newpage","meta":{"arxiv_id":"1711.11276","language":"en","source":"arxiv","timestamp":1512119164000,"url":"https:\/\/arxiv.org\/abs\/1711.11276","yymm":"1711"}} -{"text":"\\section{Introduction}\r\n\r\n\\label{intro}\r\n\r\nQuantum decays are a common phenomenon in particle, nuclear, and atomic\r\nphysics \\cite{ghirardi,fprev,pdg}. A typical starting point for the discussion\r\nof the decay of the amplitude for the survival probability of a certain\r\nunstable state $S$\n\r\n\\begin{equation}\r\na_{S}(t)=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}d_{S}(m)e^{-imt}\\text{ ,}\\label{as\n\\end{equation}\r\nwhere $d_{S}(m)$ is the so-called energy (or mass) distribution ($\\mathrm{dm\nd_{S}(m)$ is the probability that the unstable state has an energy (or mass)\r\nbetween $m$ and $m+dm$ and $m_{th}$ the lowest energy of the system). Under\r\ngeneral assumptions, one can show that the survival probability $p_{S\n(t)=\\left\\vert a_{S}(t)\\right\\vert ^{2}$ is not exponential both at short\r\ntimes (where $p_{S}^{\\prime}(t\\rightarrow0)=0$) and at long times (where a\r\npower low is realized). For these deviations to occur, it is enough that a\r\nlow-energy threshold is present and that $d_{S}(m)$ is not of the Breit-Wigner\r\ntype \\cite{vw}, see also Refs.\r\n\\cite{ghirardi,fprev,khalfin,facchiprl,winter,volya,urbanolast} and refs.\r\ntherein. The short-time behavior leads to the so-called Quantum\\ Zeno Effect\r\n(QZE): multiple collapse measurements freeze the time evolution, thus\r\npreventing the decay to take place \\cite{dega,misra,koshinorev,gppra}. Note,\r\nvery often $p_{S}(t)$ is expressed as $1-t^{2}\/\\tau_{Z}^{2}+...$($\\tau_{Z}$ is\r\nthe Zeno time), but the weaker requirement of a zero derivative of $p_{S}(t)$\r\nat $t=0$ is sufficient. Experimentally, deviations from the exponential law\r\nhave been measured at short times in Ref. \\cite{raizen} (for the corresponding\r\nQZE see Ref. \\cite{raizen2}) and at long times in\\ Ref. \\cite{rothe} (for an\r\nindirect proof through data on beryllium decays, see Ref. \\cite{kelkar}).\r\n\r\nLee Hamiltonians \\cite{lee} (LH) represent a useful theoretical framework for\r\nthe study of decays, e.g. Refs.\r\n\\cite{fprev,facchiprl,vanhove,duecan,giacosapra} and refs. therein. This\r\napproach resembles very closely Quantum\\ Field Theory (QFT). Similar\r\nHamiltonians have been used in various areas of physics, which go from atomic\r\nphysics and quantum optic \\cite{vw,ford,jc} to QCD \\cite{liu}.\r\n\r\nThe issue of non-exponential decay in a pure QFT framework is still debated:\r\nwhile in\\ Ref. \\cite{maiani} a negative result was found (also in the case of\r\nsuper-renormalizable Lagrangian), in\\ Ref. \\cite{duecan,zenoqft} a different\r\nresult is obtained: it is argued that also in QFT Eq. (1) holds and short- and\r\nlong-time deviations take place.\r\n\r\nWhile the final goal is the derivation of Eq. (1), and hence of\r\nnon-exponential decay, in a genuine QFT relativistic environment, in this work\r\nwe take a more humble intent. We aim to recall in a detailed (and also\r\ndidactical) way how Eq. (1) emerges when using Lee Hamiltonians. In\r\nparticular, we shall also show that $d_{S}(m)$ is the mass distribution of the\r\ndecaying particle. Moreover, we establish a link between a discrete and\r\ncontinuous base of final states and between the basis of the unperturbed and\r\nfull Hamiltonians. This study is intended to be useful for further analyses on\r\nnon-exponential decays.\r\n\r\nThe article is organized as follows: in\\ Sec. 2 we present the Lee\r\nHamiltonian, both in the discrete and in the continuous cases. Then, in Sec. 3\r\nwe study the time evolution of an unstable state: the amplitude of the\r\nsurvival probability is expressed first as the Fourier transformation of the\r\npropagator and then of the energy distribution. Finally, in Sec. 4 we present\r\nconclusions and outlook.\r\n\r\n\\section{The Lee Hamiltonian's approach}\r\n\r\nWe present here the Lee Hamiltonian (LH), first using an infinite discrete set\r\nof decay products and then performing the limit to the continuous case.\r\n\r\n\\subsection{Discrete LH}\r\n\r\nLet us consider the quantum state $\\left\\vert S\\right\\rangle $ as the unstable\r\nstate that we aim to investigate. In particular, we study its time evolution\r\nafter its preparation at $t=0.$ The state $\\left\\vert S\\right\\rangle $\r\ninteracts with an `infinity' of other states, denoted as:\r\n\\begin{equation}\r\n\\left\\vert k_{n}\\right\\rangle \\text{ with }k_{n}=\\frac{2n\\pi}{L}\\text{ and\r\n}n=0,\\pm1,\\pm2,...\\text{ ,\n\\end{equation}\r\nwhere the quantity $L$ (with the dimension of energy$^{-1}$) can be thought as\r\nthe length of the linear box in which we place our system. The physical\r\nresults should not depend on $L,$ if it is large enough. The quantities\r\n$k_{n}$ `look like' momenta, see below. Finally, the basis of the Hilbert\r\nspace of our quantum problem reads\n\\begin{equation}\r\n\\text{Basis of the Hilbert space }\\mathcal{H}\\text{: }\\left\\{ \\left\\vert\r\nS\\right\\rangle ,\\left\\vert k_{0}\\right\\rangle ,\\left\\vert k_{1}\\right\\rangle\r\n,\\left\\vert k_{-1}\\right\\rangle ,...\\right\\} \\equiv\\left\\{ \\left\\vert\r\nS\\right\\rangle ,\\left\\vert k_{n}\\right\\rangle \\right\\}\r\n\\end{equation}\r\nwith the usual orthonormal and completeness relations:\r\n\\begin{equation}\r\n\\left\\langle S|S\\right\\rangle =1\\text{ , }\\left\\langle S|k_{n}\\right\\rangle\r\n=0\\text{ },\\text{ }\\left\\langle k_{n}|k_{m}\\right\\rangle =\\delta_{nm}\\text{ ;\r\n}\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\sum_{n}\\left\\vert\r\nk_{n}\\right\\rangle \\left\\langle k_{n}\\right\\vert =1_{\\mathcal{H}}\\text{ .\n\\end{equation}\r\n\r\n\r\nThe Lee Hamiltonian of the system consists of two pieces:\r\n\\begin{equation}\r\nH=H_{0}+H_{1\n\\end{equation}\r\nwhere $H_{0}$ describes the free (non-interacting) part while $H_{1}$ mixes\r\n$\\left\\vert S\\right\\rangle $ with all $\\left\\vert k_{n}\\right\\rangle $\r\n\\begin{equation}\r\nH_{0}=M_{0}\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\sum\r\n_{n=0,\\pm1,...}\\omega(k_{n})\\left\\vert k_{n}\\right\\rangle \\left\\langle\r\nk_{n}\\right\\vert \\text{ ; }H_{1}=\\sum_{n=0,\\pm1,...}\\frac{gf(k_{n})}{\\sqrt{L\n}\\left( \\left\\vert S\\right\\rangle \\left\\langle k_{n}\\right\\vert +\\left\\vert\r\nk_{n}\\right\\rangle \\left\\langle S\\right\\vert \\right) \\text{ .\n\\end{equation}\r\n\r\n\r\nFollowing comments are in order:\r\n\r\n\\begin{itemize}\r\n\\item The quantities $M_{0},$ $\\omega(k_{n}),$ $gf(k_{n})$ are real.\r\n\r\n\\item The Hamiltonian $H$ is Hermitian.\r\n\r\n\\item Dimensions: $M_{0}$ and $\\omega(k_{n})$ have dimensions [energy], while\r\n$g$ has dimensions [energy$^{+1\/2}$]\r\n\r\n\\item The energy $M_{0}$ is the bare energy of the level $\\left\\vert\r\nS\\right\\rangle $. In particle physics, it is the bare mass at rest.\r\n\r\n\\item The energy $\\omega(k_{n})$ is the bare energy of the state $\\left\\vert\r\nk_{n}\\right\\rangle ,$ see below.\r\n\r\n\\item The coupling constant $g$ measures the strength of the interaction; the\r\nform factor $f(k_{n})$ modulates the interaction. In practice, each mixing\r\n$\\left\\vert S\\right\\rangle \\longleftrightarrow\\left\\vert k_{n}\\right\\rangle $\r\nhas its own coupling constant $gf(k_{n}).$\r\n\r\n\\item The factor$\\sqrt{L}$ is introduced for future convenience: it is\r\nnecessary for a smooth continuous limit $L\\rightarrow\\infty$.\r\n\r\n\\item For notational simplicity, $\\sum_{n=0,\\pm1,...}$can be also expressed\r\nsimply as $\\sum_{n}$.\r\n\\end{itemize}\r\n\r\nFurther discussion concerns the interpretation and the energy $\\omega(k_{n})$.\r\n\r\n\\textbf{Interpretation:} The state $\\left\\vert S\\right\\rangle $ represents an\r\nunstable particle $S$ in its rest frame and the state $\\left\\vert\r\nk_{n}\\right\\rangle $ represents a possible final state of the decay of $S.$ In\r\nthe simplest case of a two-body decay, the state $\\left\\vert k_{n\n\\right\\rangle $ represents \\textbf{two} particles emitted by $S$ flying\r\nback-to-back\n\\begin{equation}\r\nS\\rightarrow\\varphi_{1}+\\varphi_{2}\\text{ .\n\\end{equation}\r\nIn the case of one spacial dimension, $k_{n}$ can be interpreted as the\r\nmomentum of $\\varphi_{1}$, while $-k_{n}$ is the momentum of $\\varphi_{2}$.\r\nSchematically: $\\left\\vert k_{n}\\right\\rangle \\equiv\\left\\vert \\varphi\r\n_{1}(k_{n}),\\varphi_{2}(-k_{n})\\right\\rangle $. In this way, the total\r\nmomentum of $\\left\\vert k_{n}\\right\\rangle $ is still zero, as it must. (The\r\n3D extension is straightforward). As examples of such a process, we may think\r\nof: (i) The neutral pion $\\pi^{0}$ decays into two photons: $\\pi\r\n^{0}\\rightarrow\\gamma\\gamma.$ Then, $\\pi^{0}$ in its rest frame corresponds to\r\n$\\left\\vert S\\right\\rangle ,$ while $\\gamma\\gamma$ corresponds to $\\left\\vert\r\nk_{n}\\right\\rangle $ (one photon has momentum $k_{n},$ the other $-k_{n}$).\r\n(Note, a very large number of two-body decays is listed in the PDG\r\n\\cite{pdg}). (ii) An excited atom $A^{\\ast}$ decays into the-ground state atom\r\n$A$ emitting a photon $\\gamma$: $A^{\\ast}\\rightarrow A\\gamma.$ In this case,\r\n$A^{\\ast}$ is the sate $\\left\\vert S\\right\\rangle ,$ while $\\left\\vert\r\nk_{n}\\right\\rangle $ represents the joint system of the ground-state atom $A$\r\nand the photon.\r\n\r\n\\textbf{Function }$\\omega(k_{n})$: as mentioned above, the function\r\n$\\omega(k_{n})$ represents the energy of the state $\\left\\vert k_{n\n\\right\\rangle $. In the case of a two-body decay its form is given by\r\n\\begin{equation}\r\n\\omega(k_{n})=\\sqrt{k_{n}^{2}+m_{1}^{2}}+\\sqrt{k_{n}^{2}+m_{2}^{2}}\\text{ ,\n\\end{equation}\r\nwhere $m_{1}$ is the mass of $\\varphi_{1}$ and $m_{2}$ of $\\varphi_{2}$.\r\nClearly, $\\omega(k_{n})\\geq m_{1}+m_{2}=m_{th}$, where $m_{th}$ represents the\r\nlowest energy of the $\\left\\vert k\\right\\rangle $ states. In the two-photon\r\ndecay such as the process (i) described above, one has $m_{1}=m_{2}=0,$ hence\r\n$\\omega(k_{n})=2\\left\\vert k_{n}\\right\\vert \\geq0=m_{th}$. In an atomic decay\r\nof the type $A^{\\ast}\\rightarrow A+\\gamma$, one has $m_{1}=0,$ and\r\n$m_{2}=M_{A},$ hence\n\\begin{equation}\r\n\\omega(k_{n})\\simeq\\left\\vert k_{n}\\right\\vert +M_{A}.\r\n\\end{equation}\r\nIn this case, one could also subtract a constant term, $H_{0}\\rightarrow\r\nH_{0}-M_{A}1_{\\mathcal{H}},$ out of which $\\omega(k_{n})\\simeq\\left\\vert\r\nk_{n}\\right\\vert $.\r\n\r\n\\subsection{Continuous LH}\r\n\r\nThe limit $L\\rightarrow\\infty$ implies that the variable $k_{n}$ becomes\r\ncontinuos:\r\n\\begin{equation}\r\nk_{n}=\\frac{2\\pi n}{L}\\rightarrow k\\subset(-\\infty,+\\infty).\r\n\\end{equation}\r\nAs usual, when $L$ is sent to infinity sums turn into integrals\n\\begin{equation}\r\n\\sum_{n}=\\frac{L}{2\\pi}\\sum_{n}\\frac{2\\pi}{L}\\rightarrow\\frac{L}{2\\pi\n\\int_{-\\infty}^{+\\infty}\\mathrm{dk}=L\\int_{-\\infty}^{+\\infty}\\frac\r\n{\\mathrm{dk}}{2\\pi}\\text{ ,\n\\end{equation}\r\nwhere $\\delta k=2\\pi\/L$ has been introduced in order to generate the\r\ndifferential $dk$. Next, we turn to the kets $\\left\\vert k\\right\\rangle $ in\r\nthe continuous limit, for which we expect that $\\left\\langle k_{1\n|k_{2}\\right\\rangle =\\delta(k_{1}-k_{2})$. To this end, let us write down the\r\nfollowing $L$-dependent discrete representation of the Dirac-delta function\n\r\n\\begin{equation}\r\n\\delta_{L}(k_{n})=\\int_{-L\/2}^{L\/2}\\frac{\\mathrm{dx}}{2\\pi}e^{ik_{n\nx}=\\left\\{\r\n\\begin{array}\r\n[c]{c\n0\\text{ for }n\\neq0\\\\\r\n\\frac{L}{2\\pi}\\text{ for }n=0\r\n\\end{array}\r\n\\right. \\text{ .\n\\end{equation}\r\nIn the limit $L\\rightarrow\\infty$ one obtains (for an arbitrary function\r\n$u(k)$):\r\n\\begin{equation}\r\nu(0)=\\sum_{n}\\delta k\\delta_{L}(k_{n})u(k_{n})\\rightarrow\\int_{-\\infty\r\n}^{+\\infty}\\mathrm{dk}\\delta(k)u(k)=u(0)\r\n\\end{equation}\r\nshowing that $\\delta(k)=\\lim_{L\\rightarrow\\infty}\\delta_{L}(k_{n})$ holds.\r\nFinally, the quite subtle link between $\\left\\vert k_{n}\\right\\rangle $ and\r\n$\\left\\vert k\\right\\rangle $ is given by\n\\begin{equation}\r\n\\left\\vert k_{n}\\right\\rangle \\overset{L\\rightarrow\\infty}{=}\\sqrt{\\frac{2\\pi\r\n}{L}}\\left\\vert k\\right\\rangle \\text{ .\n\\end{equation}\r\nNamely:\r\n\\begin{equation}\r\n\\left\\langle k_{1}|k_{2}\\right\\rangle =\\lim_{L\\rightarrow\\infty}\\frac{L}{2\\pi\r\n}\\left\\langle k_{n_{1}}|k_{n_{2}}\\right\\rangle =\\lim_{L\\rightarrow\\infty\r\n}\\left\\{\r\n\\begin{array}\r\n[c]{c\n0\\text{ for }n_{1}\\neq n_{2}\\\\\r\n\\frac{L}{2\\pi}=\\delta_{L}(0)\\text{ for }n_{1}=n_{2\n\\end{array}\r\n\\right. =\\delta(k_{1}-k_{2})\\text{ ,\n\\end{equation}\r\nas desired. (Note, in 3D we have $\\sum_{\\mathbf{k}}\\rightarrow V\\int\r\n\\frac{d^{3}k}{(2\\pi)^{3}}$ ,where $V=L^{3},$ and $\\left\\vert \\mathbf{k\n=2\\pi\\mathbf{n}\/L\\right\\rangle \\rightarrow(2\\pi)^{3\/2}\/\\sqrt{V}\\left\\vert\r\n\\mathbf{k}\\right\\rangle $). It is also quite peculiar that the dimension of\r\nthe ket changes when considering the limit $L\\rightarrow\\infty$\n\\begin{equation}\r\n\\dim[\\left\\vert k_{n}\\right\\rangle ]=[\\text{Energy}^{0}]\\text{ (dimensionless)\r\n, }\\dim[\\left\\vert k\\right\\rangle ]=[\\text{Energy}^{-1\/2}]\\text{ .\n\\end{equation}\r\nThen, the continuos Hilbert space is given by $\\mathcal{H}=\\left\\{ \\left\\vert\r\nS\\right\\rangle ,\\left\\vert k\\right\\rangle \\right\\} $ wit\n\\begin{equation}\r\n\\left\\langle S|S\\right\\rangle =1\\text{ , }\\left\\langle S|k\\right\\rangle\r\n=0\\text{ },\\text{ }\\left\\langle k_{1}|k_{2}\\right\\rangle =\\delta(k_{1\n-k_{2})\\text{ .\n\\end{equation}\r\nWe also check the completeness relation:\r\n\\begin{equation}\r\n1_{\\mathcal{H}}=\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\sum\r\n_{n}\\left\\vert k_{n}\\right\\rangle \\left\\langle k_{n}\\right\\vert =\\left\\vert\r\nS\\right\\rangle \\left\\langle S\\right\\vert +\\sum_{n}\\delta k\\left( \\sqrt\r\n{\\frac{L}{2\\pi}}\\left\\vert k_{n}\\right\\rangle \\left\\langle k_{n}\\right\\vert\r\n\\sqrt{\\frac{L}{2\\pi}}\\right) \\overset{L\\rightarrow\\infty}{\\rightarrow\r\n}\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\int_{-\\infty}^{+\\infty\r\n}dk\\left\\vert k\\right\\rangle \\left\\langle k\\right\\vert =1_{\\mathcal{H}\n\\end{equation}\r\nFinally, we are ready to present the Lee Hamiltonian $H=H_{0}+H_{1}$ in the\r\ncontinuous limit\n\r\n\\begin{equation}\r\nH_{0}=M\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\int_{-\\infty\r\n}^{+\\infty}\\mathrm{dk}\\omega(k)\\left\\vert k\\right\\rangle \\left\\langle\r\nk\\right\\vert \\text{ , }H_{1}=\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\frac\r\n{gf(k)}{\\sqrt{2\\pi}}\\left( \\left\\vert S\\right\\rangle \\left\\langle\r\nk\\right\\vert +\\left\\vert k\\right\\rangle \\left\\langle S\\right\\vert \\right)\r\n\\text{ .}\\nonumber\r\n\\end{equation}\r\nOne can verify that the dimensions is preserved. For instance:\r\n\\begin{equation}\r\n\\dim\\left[ dk\\omega(k)\\left\\vert k\\right\\rangle \\left\\langle k\\right\\vert\r\n\\right] =\\dim[dk]\\dim[\\omega(k)]\\dim^{2}[\\left\\vert k\\right\\rangle\r\n]=[\\text{Energy}][\\text{Energy}][\\text{Energy}^{-1}]=[\\text{Energy}]\\text{ .\n\\end{equation}\r\n\r\n\r\n\\section{Determination of the survival probability}\r\n\r\n\\subsection{Time evolution operator}\r\n\r\nThe Schr\\\"{o}dinger equation (in natural units\n\\begin{equation}\r\ni\\frac{\\partial\\left\\vert \\psi(t)\\right\\rangle }{\\partial t}=H\\left\\vert\r\n\\psi(t)\\right\\rangle\r\n\\end{equation}\r\ncan be univocally solved for a certain given initial state\r\n\\begin{equation}\r\n\\left\\vert \\psi(0)\\right\\rangle =c_{S}\\left\\vert S\\right\\rangle +\\sum_{n\nc_{n}\\left\\vert k_{n}\\right\\rangle \\overset{L\\rightarrow\\infty}{\\equiv\nc_{S}\\left\\vert S\\right\\rangle +\\int_{-\\infty}^{+\\infty}\\mathrm{dk\nc(k)\\left\\vert k\\right\\rangle\r\n\\end{equation}\r\nwith $c(k)\\overset{L\\rightarrow\\infty}{\\equiv}\\sqrt{\\frac{L}{2\\pi}}c_{n}$ .\r\nThe normalization $\\left\\langle \\psi(t)|\\psi(t)\\right\\rangle =1$ implie\n\\begin{equation}\r\n1=\\left\\vert c_{S}\\right\\vert ^{2}+\\sum_{n}\\left\\vert c_{n}\\right\\vert\r\n^{2}\\overset{L\\rightarrow\\infty}{\\equiv}\\left\\vert c_{S}\\right\\vert ^{2\n+\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\left\\vert c(k)\\right\\vert ^{2}\\text{ .\n\\end{equation}\r\nIn particular, one is typically interested to the case $c_{S}=1$ (but not\r\nonly). A formal solution to the time evolution is obtained by introducing the\r\ntime-evolution operator\n\r\n\\begin{equation}\r\nU(t)=e^{-iHt}\\rightarrow\\left\\vert \\psi(t)\\right\\rangle =U(t)\\left\\vert\r\n\\psi(0)\\right\\rangle \\text{ . \n\\end{equation}\r\nThe time-evolution operator $U(t)$ can be expressed in terms of a Fourier\r\ntransform (for $t>0$)\n\\begin{equation}\r\nU(t)=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}\\frac{1\n{E-H+i\\varepsilon}e^{-iEt}\\text{ }=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty\r\n}\\mathrm{dE}G(E)e^{-iEt}\\text{ with }G(E)=\\frac{1}{E-H+i\\varepsilon}\\text{ ,}\r\n\\label{uoft\n\\end{equation}\r\nwhere $\\varepsilon$ is an infinitesimal quantity and $G(E)$ is the `propagator\r\noperator', which can be expanded as\n\r\n\\begin{equation}\r\nG(E)=\\frac{1}{E-H+i\\varepsilon}=\\sum_{n=0}^{\\infty}\\left( \\frac{1\n{E-H_{0}+i\\varepsilon}H_{1}\\right) ^{n}\\frac{1}{E-H_{0}+i\\varepsilon}\\text{\r\n,\n\\end{equation}\r\nwhere we have used that $(AB)^{-1}=B^{-1}A^{-1}$ ($A,B$ arbitrary operators on\r\nthe Hilbert space $\\mathcal{H}$).\r\n\r\n\\subsection{Propagator,\\ Feynman rules, and survival probability}\r\n\r\nWe are interested in the evaluation of the survival (or non-decay) probability\r\namplitude $a_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle $,\r\nout of which the survival probability of the state $S$ reads $p_{S\n(t)=\\left\\vert a_{S}(t)\\right\\vert ^{2}.$ In the \\textit{trivial limit,} in\r\nwhich $H=H_{0}$ ( $g\\rightarrow0$), one has\r\n\\begin{equation}\r\na_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle =\\left\\langle\r\nS\\right\\vert e^{-iH_{0}t}\\left\\vert S\\right\\rangle =e^{-iM_{0}t}\\rightarrow\r\np_{S}(t)=1\\text{ .\n\\end{equation}\r\nAlternatively, one may use Eq. (\\ref{uoft}):\r\n\\begin{equation}\r\na_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle =\\frac\r\n{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}\\frac{1}{E-M_{0}+i\\varepsilon\r\n}e^{-iEt}\\text{ }=e^{-iM_{0}t}\\text{ ,\n\\end{equation}\r\nwhere we have closed downwards and picked up the pole for $E=M_{0\n-i\\varepsilon$ (one is obliged to close downwards to guarantee convergence).\r\nIn passing by, we note that the object\r\n\\begin{equation}\r\nG_{S}^{\\text{free}}(E)=G_{S}^{(0)}(E)=\\left\\langle S\\right\\vert \\frac\r\n{1}{E-H_{0}+i\\varepsilon}\\left\\vert S\\right\\rangle =\\frac{1}{E-M_{0\n+i\\varepsilon\n\\end{equation}\r\nis the free propagator of the state $S.$\r\n\r\nIn the \\textit{interacting case }the evaluation of\\textit{ }$a(t)$\\textit{\r\n}proceeds as follow:\r\n\\begin{equation}\r\na_{S}(t)=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}G_{S}(E)e^{-iEt\n\\text{ , where }G_{S}(E)=\\left\\langle S\\right\\vert G(E)\\left\\vert\r\nS\\right\\rangle =\\left\\langle S\\right\\vert \\frac{1}{E-H+i\\varepsilon}\\left\\vert\r\nS\\right\\rangle \\label{as1\n\\end{equation}\r\nis the full propagator of $S.$ It is now necessary to evaluate $G_{S}(E)$\r\nexplicitly though a lengthy but straightforward calculation \\cite{duecan}:\r\n\\begin{equation}\r\nG_{S}(E)=\\left\\langle S\\right\\vert G(E)\\left\\vert S\\right\\rangle =\\sum\r\n_{n=0}^{\\infty}\\left\\langle S\\right\\vert \\left( \\frac{1}{E-H_{0\n+i\\varepsilon}H_{1}\\right) ^{n}\\frac{1}{E-H_{0}+i\\varepsilon}\\left\\vert\r\nS\\right\\rangle =\\sum_{n=0}^{\\infty}G_{S}^{(n)}(E)\r\n\\end{equation}\r\nwit\n\\begin{equation}\r\nG_{S}^{(n)}(E)=\\left\\langle S\\right\\vert \\left( \\frac{1}{E-H_{0\n+i\\varepsilon}H_{1}\\right) ^{n}\\left\\vert S\\right\\rangle \\frac{1\n{E-M_{0}+i\\varepsilon}\\text{ .\n\\end{equation}\r\nLet us evaluate the first three terms\n\\begin{equation}\r\nn=0\\rightarrow G_{S}^{(0)}(E)=\\left\\langle S\\right\\vert 1\\left\\vert\r\nS\\right\\rangle \\frac{1}{E-M_{0}+i\\varepsilon}=\\frac{1}{E-M_{0}+i\\varepsilon\r\n}\\text{ ,\n\\end{equation\n\\begin{equation}\r\nn=1\\rightarrow\\left\\langle S\\right\\vert \\frac{1}{E-H_{0}+i\\varepsilon\nH_{1}\\left\\vert S\\right\\rangle \\frac{1}{E-M_{0}+i\\varepsilon}=0\\text{ ,\n\\end{equation}\r\n\r\n\\begin{align}\r\nn & =2\\rightarrow G_{S}^{(1)}(E)=\\left\\langle S\\right\\vert \\left( \\frac\r\n{1}{E-H_{0}+i\\varepsilon}H_{1}\\right) ^{2}\\left\\vert S\\right\\rangle \\frac\r\n{1}{E-M_{0}+i\\varepsilon}\\\\\r\n& =\\frac{1}{E-M_{0}+i\\varepsilon}\\left\\langle S\\right\\vert H_{1}\\frac\r\n{1}{E-H_{0}+i\\varepsilon}H_{1}\\left\\vert S\\right\\rangle \\frac{1\n{E-M_{0}+i\\varepsilon}=-\\frac{\\Pi(E)}{\\left( E-M_{0}+i\\varepsilon\\right)\r\n^{3}}\\text{ .\n\\end{align}\r\nThe recursive quantity $\\Pi(E)$ reads\n\\begin{equation}\r\n\\Pi(E)=-\\left\\langle S\\right\\vert H_{1}\\frac{1}{E-H_{0}+i\\varepsilon\nH_{1}\\left\\vert S\\right\\rangle \\text{ .\n\\end{equation}\r\nWe introduce $1_{\\mathcal{H}}=\\left\\vert S\\right\\rangle \\left\\langle\r\nS\\right\\vert +\\int_{-\\infty}^{+\\infty}dk\\left\\vert k\\right\\rangle \\left\\langle\r\nk\\right\\vert $ two times, obtaining:\r\n\\begin{align}\r\n\\Pi(E) & =-\\left\\langle S\\right\\vert H_{1}1_{\\mathcal{H}}\\frac{1\n{E-H_{0}+i\\varepsilon}1_{\\mathcal{H}}H_{1}\\left\\vert S\\right\\rangle\r\n=-\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\int_{-\\infty}^{+\\infty}\\mathrm{dq\n\\left\\langle S\\right\\vert H_{1}\\left\\vert k\\right\\rangle \\left\\langle\r\nk\\right\\vert \\frac{1}{E-H_{0}+i\\varepsilon}\\left\\vert q\\right\\rangle\r\n\\left\\langle q\\right\\vert H_{1}\\left\\vert S\\right\\rangle \\nonumber\\\\\r\n& =-\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\int_{-\\infty}^{+\\infty\n\\mathrm{dq}\\frac{gf(k)}{\\sqrt{2\\pi}}\\frac{\\delta(k-q)}{E-\\omega\r\n(k)+i\\varepsilon}\\frac{gf(q)}{\\sqrt{2\\pi}}=-\\int_{-\\infty}^{+\\infty\n\\frac{\\mathrm{dk}}{2\\pi}\\frac{g^{2}f(k)^{2}}{E-\\omega(k)+i\\varepsilon}\\text{\r\n,}\\nonumber\r\n\\end{align}\r\nwhere $\\left\\langle S\\right\\vert H_{1}\\left\\vert k\\right\\rangle =gf(k)\/\\sqrt\r\n{2\\pi}$ was used. Going further, for $n=0,1,2,...$ we get $G_{S\n^{(2n+1)}(E)=0$ and\r\n\\begin{equation}\r\nG_{S}^{(2n)}(E)=\\frac{\\left[ -\\Pi(E)\\right] ^{n}}{\\left( E-M_{0\n+i\\varepsilon\\right) ^{n+1}}\\text{ .\n\\end{equation}\r\nFinally\n\\begin{align}\r\nG_{S}(E) & =\\sum_{n=0}^{\\infty}G_{S}^{(2n)}(E)=\\sum_{n=0}^{\\infty\n\\frac{\\left[ -\\Pi(E)\\right] ^{n}}{\\left( E-M_{0}+i\\varepsilon\\right)\r\n^{2n+1}}=\\frac{1}{\\left( E-M_{0}+i\\varepsilon\\right) }\\sum_{n=0}^{\\infty\r\n}\\frac{\\left[ -\\Pi(E)\\right] ^{n}}{\\left( E-M_{0}+i\\varepsilon\\right)\r\n^{n}}\\\\\r\n& =\\frac{1}{\\left( E-M_{0}+i\\varepsilon\\right) }\\frac{1}{1+\\frac{\\Pi\r\n(E)}{E-M_{0}+i\\varepsilon}}=\\frac{1}{E-M_{0}+\\Pi(E)+i\\varepsilon}\\text{ .\n\\end{align}\r\nAt this point, we can identify `Feynman rules' reminiscent of QFT\n\\begin{align}\r\n\\text{bare }S\\text{ propagator } & \\rightarrow\\frac{1}{E-M_{0}+i\\varepsilon\r\n}\\\\\r\n\\text{bare }k\\text{ propagator (}k\\text{ fixed)} & \\rightarrow\\frac\r\n{1}{E-\\omega(k)+i\\varepsilon}\\\\\r\nkS\\text{ vertex} & \\rightarrow gf(k)\\\\\r\n\\text{internal }k\\text{ line(}k\\text{ not fixed)} & \\rightarrow-\\Pi\r\n(E)=\\int_{-\\infty}^{+\\infty}\\frac{dk}{2\\pi}\\frac{g^{2}f(k)^{2}}{E-\\omega\r\n(k)+i\\varepsilon\n\\end{align}\r\nNote, the latter can be understood by applying $gf(k)$ at each vertex and the\r\n$k$-propagator in the middle, and by an overall integration $\\int_{-\\infty\r\n}^{+\\infty}\\frac{dk}{2\\pi}$ due to the fact that $k$ is not fixed.\r\n\r\nThe full propagator of $S$ determined above\n\\begin{equation}\r\n\\text{full }S\\text{ propagator }\\rightarrow\\frac{1}{E-M_{0}+\\Pi\r\n(E)+i\\varepsilon}\\text{ ,\n\\end{equation}\r\n\r\n\r\ncan be also obtained in a very elegant way by using the Bethe-Salpeter\r\nequation obtained by using the Feynman rules listed above:\r\n\\begin{equation}\r\nG_{S}(E)=\\frac{1}{E-M_{0}+i\\varepsilon}-\\frac{1}{E-M_{0}+i\\varepsilon\n\\Pi(E)G_{S}(E)\\text{ .\n\\end{equation}\r\n\r\n\r\nFinally, the survival amplitude (\\ref{as1}) can be expressed as\n\\begin{equation}\r\na_{S}(t)=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}G_{S}(E)e^{-iEt\n=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}\\frac{1}{E-M_{0\n+\\Pi(E)+i\\varepsilon}e^{-iEt}\\text{ .} \\label{as2\n\\end{equation}\r\n\r\n\r\n\\subsection{Spectral function and survival probability}\r\n\r\nLet us denote the basis of eigenstates of the Hamiltonian $H$ as $\\left\\vert\r\nm\\right\\rangle $ wit\n\\begin{equation}\r\nH\\left\\vert m\\right\\rangle =m\\left\\vert m\\right\\rangle \\text{ for }m\\geq\r\nm_{th}\\text{ (}m_{th}\\text{ is the low-energy threshold) .\n\\end{equation}\r\nThe existence of a minimal energy $m_{th}$ is a general physical and\r\nmathematical property. The states $\\left\\vert m\\right\\rangle $ form an\r\northonormal basis of the Hilbert space $\\mathcal{H=}\\{\\left\\vert\r\nm\\right\\rangle $ with $m\\geq m_{th}\\}$, whose elements fulfill standard\r\nrelations:\r\n\\begin{equation}\r\n1_{\\mathcal{H}}=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}\\left\\vert m\\right\\rangle\r\n\\left\\langle m\\right\\vert \\text{ ; }\\left\\langle m_{1}|m_{2}\\right\\rangle\r\n=\\delta(m_{1}-m_{2})\\text{ .\n\\end{equation}\r\nThe link between the `old' basis $\\{\\left\\vert S\\right\\rangle ,\\left\\vert\r\nk\\right\\rangle \\}$ (eigenstates of $H_{0}$) and the `new' basis $\\{\\left\\vert\r\nm\\right\\rangle \\}$ (eigenstates of $H$) is not trivial. The state $\\left\\vert\r\nS\\right\\rangle $ can be expressed in terms of the basis $\\{\\left\\vert\r\nm\\right\\rangle $ $\\}$ as\r\n\\begin{equation}\r\n\\left\\vert S\\right\\rangle =\\int_{m_{th}}^{\\infty}\\mathrm{dm}\\alpha\r\n_{S}(m)\\left\\vert m\\right\\rangle \\text{ with }\\alpha_{S}(m)=\\left\\langle\r\nS|m\\right\\rangle \\text{ .\n\\end{equation}\r\nThe quantity\r\n\\begin{equation}\r\nd_{S}(m)=\\left\\vert \\alpha_{S}(m)\\right\\vert ^{2}=\\left\\vert \\left\\langle\r\nS|m\\right\\rangle \\right\\vert ^{2\n\\end{equation}\r\nis called the \\textbf{spectral function (or energy\/mass distribution) }of the\r\nstate $S$. The normalization of the state $\\left\\vert S\\right\\rangle $ implies\r\nthe normalization of the mass distribution $d_{S}(m)$:\r\n\\begin{equation}\r\n1=\\left\\langle S|S\\right\\rangle =\\int_{m_{th}}^{\\infty}d_{S}(m)\\mathrm{dm\n\\text{ .} \\label{norm\n\\end{equation}\r\nThe simple intuitive interpretation is that $d_{S}(m)\\mathrm{dm}$ represents\r\nthe probability that the state $S$ has a energy (or mass) between $m$ and\r\n$m+dm.$ As a consequence, the time-evolution can be easily evaluated by\r\ninserting $1=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}\\left\\vert m\\right\\rangle\r\n\\left\\langle m\\right\\vert $ two times\n\\begin{equation}\r\na_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle =\\left\\langle\r\nS\\right\\vert e^{-iHt}\\left\\vert S\\right\\rangle =\\int_{m_{th}}^{\\infty\r\n}\\mathrm{dm}d_{S}(m)e^{-imt}\\text{ .\n\\end{equation}\r\nThis is all formally correct, but it does not help us further as long as we do\r\nnot have a way to calculate $d_{S}(m)$. This is possible by using the\r\npropagator of $S$ studied in Sec. 3.2. In fact, the propagator can be\r\nre-expressed as (again, inserting $1=\\int_{m_{th}}^{+\\infty}dm\\left\\vert\r\nm\\right\\rangle \\left\\langle m\\right\\vert $ two times)\n\\begin{equation}\r\nG_{S}(E)=\\frac{1}{E-M_{0}+\\Pi(E)+i\\varepsilon}=\\left\\langle S\\right\\vert\r\n1\\frac{1}{E-H+i\\varepsilon}1\\left\\vert S\\right\\rangle =\\int_{m_{th}}^{+\\infty\r\n}\\mathrm{dm}\\frac{d_{S}(m)}{E-m+i\\varepsilon}\\text{ .} \\label{kl\n\\end{equation}\r\nIts physical meaning can be understood by noticing that the dressed propagator\r\n$G_{S}(E)$ has been rewritten as the `sum' of free propagators, whose weight\r\nfunction is $d_{S}(m).$ As a next step, we need to invert Eq. (\\ref{kl}). Let\r\nus first consider the case $g=0.$ In this limit, it is evident from Eq.\r\n(\\ref{kl}) that:\r\n\\begin{equation}\r\nd_{S}(E)=\\delta(E-M_{0})\\text{ .\n\\end{equation}\r\nThis is expected because in this case the state $\\left\\vert S\\right\\rangle $\r\nis an eigenstate of the Hamiltonian, hence the mass distribution is a\r\ndelta-function peaked at $M_{0}$. When the interaction is switched on, we\r\nevaluate the imaginary part of Eq. (\\ref{kl}):\r\n\\begin{equation}\r\n\\operatorname{Im}G_{S}(E)=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}\\frac{-\\varepsilon\r\nd_{S}(m)}{(E-m)^{2}+\\varepsilon^{2}}=-\\int_{m_{th}}^{+\\infty}\\mathrm{dm\nd_{S}(m)\\pi\\delta(E-m)=-\\pi d_{S}(E)\\text{ }.\r\n\\end{equation}\r\nHence $d_{S}(E)$ is calculated as\n\\begin{equation}\r\nd_{S}(E)=-\\frac{\\operatorname{Im}G_{S}(E)}{\\pi}=\\frac{1}{\\pi}\\frac\r\n{\\operatorname{Im}\\Pi(E)}{(E-M_{0}+\\operatorname{Re}\\Pi(E))^{2}+\\left(\r\n\\operatorname{Im}\\Pi(E)\\right) ^{2}}\\text{ .} \\label{ds\n\\end{equation}\r\nThe normalization of $d_{S}(E),$ Eq. (\\ref{norm}), can be also proven by using\r\nEq. (\\ref{ds}), see details in\\ Ref. \\cite{gpprd}.\r\n\r\nIn the end, once the spectral function $d_{S}(m)$ is known, the survival\r\namplitude can be re-expressed as its Fourier transform by using Eqs.\r\n(\\ref{as2}) and (\\ref{kl})\n\\begin{align}\r\na_{S}(t) & =\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}G_{S\n(E)e^{-iEt}=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{d}E\\int_{m_{th\n}^{+\\infty}\\mathrm{dm}\\frac{d_{S}(m)}{E-m+i\\varepsilon}e^{-imt}\\nonumber\\\\\r\n& =\\int_{m_{th}}^{+\\infty}\\mathrm{dm}d_{S}(m)e^{-imt}=\\int_{m_{th}}^{+\\infty\r\n}\\mathrm{dm}d_{S}(m)e^{-imt}\\text{ .\n\\end{align}\r\nThe latter expression coincides with Eq. (1), whose detailed determination was\r\nour goal. From here on, all the usual strategy can be applied\r\n\\cite{ghirardi,fprev,koshinorev,duecan}. In particular, the (unphysical)\r\nBreit-Wigner limit is obtained for $\\omega(k)=k$ (unlimited from below) and\r\n$f(k)=1,$ out of which $d_{S}(m)=\\frac{\\Gamma}{2\\pi}\\left( \\left(\r\nm-M_{0}\\right) ^{2}+\\Gamma^{2}\/4\\right) ^{-1}$ with $\\Gamma=g^{2}.$ In this\r\ncase, $a_{S}(t)=e^{-i(M_{0}-i\\Gamma\/2)t}$ and $p_{S}(t)=e^{-\\Gamma t}$ (see\r\ndetails in\\ Refs. \\cite{giacosapra,actalast}).\r\n\r\n\\section{Conclusions}\r\n\r\nWe have proven that Eq. (1) holds in the QFT-like approach of effective Lee\r\nHamiltonians by showing all the main steps leading to it. However, a Lee\r\nHamiltonian is not fully equivalent to QFT, since some features are still\r\nmissing. In fact, the Lee approach does not contain transitions from the\r\nvacuum state to some particles (in genuine QFT, terms of the type $\\left\\vert\r\n0\\right\\rangle \\left\\langle S\\varphi_{1}\\varphi_{2}\\right\\vert $+hc are also\r\npart of the interacting Hamiltonian and affect the results for finite time\r\nintervals). Moreover, quadratic expressions are not present in the\r\npropagator(s) of the Lee Hamiltonian but naturally appear in QFT.\r\n\r\nHence, the main question for the future reads: is Eq. (1) as it stands valid\r\nalso in\\ QFT? If, as argued in\\ Refs. \\cite{duecan,zenoqft}, this is true,\r\nnon-exponential decay is realized in QFT both at short and long times.\r\nMoreover, in the interesting case of a super-renormalizable Lagrangian, the\r\nshort-time behavior is independent on the cutoff (this is so because the\r\nenergy distribution $d_{S}(m)$ scales as $m^{-3}$ for large $m$). Note, this\r\nis different from the result of Ref. \\cite{maiani} obtained by using\r\nperturbation theory at second order (that is, without resumming the propagator).\r\n\r\nA further interesting topic for the future is the study of the decay of a\r\nparticle with a nonzero momentum\r\n\\cite{khalfin2,stefanovich,shirokov,urbanowski,giunti,rel}. Contrary to naive\r\nexpectations, the usual relativistic time dilatation formula does not hold\r\n(even in the exponential limit, a different analytical result is obtained, see\r\ndetails in Ref. \\cite{rel}). The full understanding of the study of decay in\r\nQFT can also help to shed light on decays of moving particles.\r\n\r\n\\bigskip\r\n\r\n\\textbf{Acknowledgments: }F. G. thanks S. Mr\\'{o}wczy\\'{n}ski and G. Pagliara\r\nfor very useful discussions. F. G. acknowledges support from the Polish\r\nNational Science Centre (NCN) through the OPUS project no. 2015\/17\/B\/ST2\/01625.\r\n\r\n","meta":{"arxiv_id":"1711.10727","language":"en","source":"arxiv","timestamp":1512032790000,"url":"https:\/\/arxiv.org\/abs\/1711.10727","yymm":"1711"}} +{"text":"\\section{Introduction}\r\n\r\n\\label{intro}\r\n\r\nQuantum decays are a common phenomenon in particle, nuclear, and atomic\r\nphysics \\cite{ghirardi,fprev,pdg}. A typical starting point for the discussion\r\nof the decay of the amplitude for the survival probability of a certain\r\nunstable state $S$\n\r\n\\begin{equation}\r\na_{S}(t)=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}d_{S}(m)e^{-imt}\\text{ ,}\\label{as\n\\end{equation}\r\nwhere $d_{S}(m)$ is the so-called energy (or mass) distribution ($\\mathrm{dm\nd_{S}(m)$ is the probability that the unstable state has an energy (or mass)\r\nbetween $m$ and $m+dm$ and $m_{th}$ the lowest energy of the system). Under\r\ngeneral assumptions, one can show that the survival probability $p_{S\n(t)=\\left\\vert a_{S}(t)\\right\\vert ^{2}$ is not exponential both at short\r\ntimes (where $p_{S}^{\\prime}(t\\rightarrow0)=0$) and at long times (where a\r\npower low is realized). For these deviations to occur, it is enough that a\r\nlow-energy threshold is present and that $d_{S}(m)$ is not of the Breit-Wigner\r\ntype \\cite{vw}, see also Refs.\r\n\\cite{ghirardi,fprev,khalfin,facchiprl,winter,volya,urbanolast} and refs.\r\ntherein. The short-time behavior leads to the so-called Quantum\\ Zeno Effect\r\n(QZE): multiple collapse measurements freeze the time evolution, thus\r\npreventing the decay to take place \\cite{dega,misra,koshinorev,gppra}. Note,\r\nvery often $p_{S}(t)$ is expressed as $1-t^{2}\/\\tau_{Z}^{2}+...$($\\tau_{Z}$ is\r\nthe Zeno time), but the weaker requirement of a zero derivative of $p_{S}(t)$\r\nat $t=0$ is sufficient. Experimentally, deviations from the exponential law\r\nhave been measured at short times in Ref. \\cite{raizen} (for the corresponding\r\nQZE see Ref. \\cite{raizen2}) and at long times in\\ Ref. \\cite{rothe} (for an\r\nindirect proof through data on beryllium decays, see Ref. \\cite{kelkar}).\r\n\r\nLee Hamiltonians \\cite{lee} (LH) represent a useful theoretical framework for\r\nthe study of decays, e.g. Refs.\r\n\\cite{fprev,facchiprl,vanhove,duecan,giacosapra} and refs. therein. This\r\napproach resembles very closely Quantum\\ Field Theory (QFT). Similar\r\nHamiltonians have been used in various areas of physics, which go from atomic\r\nphysics and quantum optic \\cite{vw,ford,jc} to QCD \\cite{liu}.\r\n\r\nThe issue of non-exponential decay in a pure QFT framework is still debated:\r\nwhile in\\ Ref. \\cite{maiani} a negative result was found (also in the case of\r\nsuper-renormalizable Lagrangian), in\\ Ref. \\cite{duecan,zenoqft} a different\r\nresult is obtained: it is argued that also in QFT Eq. (1) holds and short- and\r\nlong-time deviations take place.\r\n\r\nWhile the final goal is the derivation of Eq. (1), and hence of\r\nnon-exponential decay, in a genuine QFT relativistic environment, in this work\r\nwe take a more humble intent. We aim to recall in a detailed (and also\r\ndidactical) way how Eq. (1) emerges when using Lee Hamiltonians. In\r\nparticular, we shall also show that $d_{S}(m)$ is the mass distribution of the\r\ndecaying particle. Moreover, we establish a link between a discrete and\r\ncontinuous base of final states and between the basis of the unperturbed and\r\nfull Hamiltonians. This study is intended to be useful for further analyzes on\r\nnon-exponential decays.\r\n\r\nThe article is organized as follows: in\\ Sec. 2 we present the Lee\r\nHamiltonian, both in the discrete and in the continuous cases. Then, in Sec. 3\r\nwe study the time evolution of an unstable state: the amplitude of the\r\nsurvival probability is expressed first as the Fourier transformation of the\r\npropagator and then of the energy distribution. Finally, in Sec. 4 we present\r\nconclusions and outlook.\r\n\r\n\\section{The Lee Hamiltonian's approach}\r\n\r\nWe present here the Lee Hamiltonian (LH), first using an infinite discrete set\r\nof decay products and then performing the limit to the continuous case.\r\n\r\n\\subsection{Discrete LH}\r\n\r\nLet us consider the quantum state $\\left\\vert S\\right\\rangle $ as the unstable\r\nstate that we aim to investigate. In particular, we study its time evolution\r\nafter its preparation at $t=0.$ The state $\\left\\vert S\\right\\rangle $\r\ninteracts with an `infinity' of other states, denoted as:\r\n\\begin{equation}\r\n\\left\\vert k_{n}\\right\\rangle \\text{ with }k_{n}=\\frac{2n\\pi}{L}\\text{ and\r\n}n=0,\\pm1,\\pm2,...\\text{ ,\n\\end{equation}\r\nwhere the quantity $L$ (with the dimension of energy$^{-1}$) can be thought as\r\nthe length of the linear box in which we place our system. The physical\r\nresults should not depend on $L,$ if it is large enough. The quantities\r\n$k_{n}$ `look like' momenta, see below. Finally, the basis of the Hilbert\r\nspace of our quantum problem reads\n\\begin{equation}\r\n\\text{Basis of the Hilbert space }\\mathcal{H}\\text{: }\\left\\{ \\left\\vert\r\nS\\right\\rangle ,\\left\\vert k_{0}\\right\\rangle ,\\left\\vert k_{1}\\right\\rangle\r\n,\\left\\vert k_{-1}\\right\\rangle ,...\\right\\} \\equiv\\left\\{ \\left\\vert\r\nS\\right\\rangle ,\\left\\vert k_{n}\\right\\rangle \\right\\}\r\n\\end{equation}\r\nwith the usual orthonormal and completeness relations:\r\n\\begin{equation}\r\n\\left\\langle S|S\\right\\rangle =1\\text{ , }\\left\\langle S|k_{n}\\right\\rangle\r\n=0\\text{ },\\text{ }\\left\\langle k_{n}|k_{m}\\right\\rangle =\\delta_{nm}\\text{ ;\r\n}\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\sum_{n}\\left\\vert\r\nk_{n}\\right\\rangle \\left\\langle k_{n}\\right\\vert =1_{\\mathcal{H}}\\text{ .\n\\end{equation}\r\n\r\n\r\nThe Lee Hamiltonian of the system consists of two pieces:\r\n\\begin{equation}\r\nH=H_{0}+H_{1\n\\end{equation}\r\nwhere $H_{0}$ describes the free (non-interacting) part while $H_{1}$ mixes\r\n$\\left\\vert S\\right\\rangle $ with all $\\left\\vert k_{n}\\right\\rangle $\r\n\\begin{equation}\r\nH_{0}=M_{0}\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\sum\r\n_{n=0,\\pm1,...}\\omega(k_{n})\\left\\vert k_{n}\\right\\rangle \\left\\langle\r\nk_{n}\\right\\vert \\text{ ; }H_{1}=\\sum_{n=0,\\pm1,...}\\frac{gf(k_{n})}{\\sqrt{L\n}\\left( \\left\\vert S\\right\\rangle \\left\\langle k_{n}\\right\\vert +\\left\\vert\r\nk_{n}\\right\\rangle \\left\\langle S\\right\\vert \\right) \\text{ .\n\\end{equation}\r\n\r\n\r\nFollowing comments are in order:\r\n\r\n\\begin{itemize}\r\n\\item The quantities $M_{0},$ $\\omega(k_{n}),$ $gf(k_{n})$ are real.\r\n\r\n\\item The Hamiltonian $H$ is Hermitian.\r\n\r\n\\item Dimensions: $M_{0}$ and $\\omega(k_{n})$ have dimensions [energy], while\r\n$g$ has dimensions [energy$^{+1\/2}$]\r\n\r\n\\item The energy $M_{0}$ is the bare energy of the level $\\left\\vert\r\nS\\right\\rangle $. In particle physics, it is the bare mass at rest.\r\n\r\n\\item The energy $\\omega(k_{n})$ is the bare energy of the state $\\left\\vert\r\nk_{n}\\right\\rangle ,$ see below.\r\n\r\n\\item The coupling constant $g$ measures the strength of the interaction; the\r\nform factor $f(k_{n})$ modulates the interaction. In practice, each mixing\r\n$\\left\\vert S\\right\\rangle \\longleftrightarrow\\left\\vert k_{n}\\right\\rangle $\r\nhas its own coupling constant $gf(k_{n}).$\r\n\r\n\\item The factor$\\sqrt{L}$ is introduced for future convenience: it is\r\nnecessary for a smooth continuous limit $L\\rightarrow\\infty$.\r\n\r\n\\item For notational simplicity, $\\sum_{n=0,\\pm1,...}$can be also expressed\r\nsimply as $\\sum_{n}$.\r\n\\end{itemize}\r\n\r\nFurther discussion concerns the interpretation and the energy $\\omega(k_{n})$.\r\n\r\n\\textbf{Interpretation:} The state $\\left\\vert S\\right\\rangle $ represents an\r\nunstable particle $S$ in its rest frame and the state $\\left\\vert\r\nk_{n}\\right\\rangle $ represents a possible final state of the decay of $S.$ In\r\nthe simplest case of a two-body decay, the state $\\left\\vert k_{n\n\\right\\rangle $ represents \\textbf{two} particles emitted by $S$ flying\r\nback-to-back\n\\begin{equation}\r\nS\\rightarrow\\varphi_{1}+\\varphi_{2}\\text{ .\n\\end{equation}\r\nIn the case of one spacial dimension, $k_{n}$ can be interpreted as the\r\nmomentum of $\\varphi_{1}$, while $-k_{n}$ is the momentum of $\\varphi_{2}$.\r\nSchematically: $\\left\\vert k_{n}\\right\\rangle \\equiv\\left\\vert \\varphi\r\n_{1}(k_{n}),\\varphi_{2}(-k_{n})\\right\\rangle $. In this way, the total\r\nmomentum of $\\left\\vert k_{n}\\right\\rangle $ is still zero, as it must. (The\r\n3D extension is straightforward). As examples of such a process, we may think\r\nof: (i) The neutral pion $\\pi^{0}$ decays into two photons: $\\pi\r\n^{0}\\rightarrow\\gamma\\gamma.$ Then, $\\pi^{0}$ in its rest frame corresponds to\r\n$\\left\\vert S\\right\\rangle ,$ while $\\gamma\\gamma$ corresponds to $\\left\\vert\r\nk_{n}\\right\\rangle $ (one photon has momentum $k_{n},$ the other $-k_{n}$).\r\n(Note, a very large number of two-body decays is listed in the PDG\r\n\\cite{pdg}). (ii) An excited atom $A^{\\ast}$ decays into the-ground state atom\r\n$A$ emitting a photon $\\gamma$: $A^{\\ast}\\rightarrow A\\gamma.$ In this case,\r\n$A^{\\ast}$ is the sate $\\left\\vert S\\right\\rangle ,$ while $\\left\\vert\r\nk_{n}\\right\\rangle $ represents the joint system of the ground-state atom $A$\r\nand the photon.\r\n\r\n\\textbf{Function }$\\omega(k_{n})$: as mentioned above, the function\r\n$\\omega(k_{n})$ represents the energy of the state $\\left\\vert k_{n\n\\right\\rangle $. In the case of a two-body decay its form is given by\r\n\\begin{equation}\r\n\\omega(k_{n})=\\sqrt{k_{n}^{2}+m_{1}^{2}}+\\sqrt{k_{n}^{2}+m_{2}^{2}}\\text{ ,\n\\end{equation}\r\nwhere $m_{1}$ is the mass of $\\varphi_{1}$ and $m_{2}$ of $\\varphi_{2}$.\r\nClearly, $\\omega(k_{n})\\geq m_{1}+m_{2}=m_{th}$, where $m_{th}$ represents the\r\nlowest energy of the $\\left\\vert k\\right\\rangle $ states. In the two-photon\r\ndecay such as the process (i) described above, one has $m_{1}=m_{2}=0,$ hence\r\n$\\omega(k_{n})=2\\left\\vert k_{n}\\right\\vert \\geq0=m_{th}$. In an atomic decay\r\nof the type $A^{\\ast}\\rightarrow A+\\gamma$, one has $m_{1}=0,$ and\r\n$m_{2}=M_{A},$ hence\n\\begin{equation}\r\n\\omega(k_{n})\\simeq\\left\\vert k_{n}\\right\\vert +M_{A}.\r\n\\end{equation}\r\nIn this case, one could also subtract a constant term, $H_{0}\\rightarrow\r\nH_{0}-M_{A}1_{\\mathcal{H}},$ out of which $\\omega(k_{n})\\simeq\\left\\vert\r\nk_{n}\\right\\vert $.\r\n\r\n\\subsection{Continuous LH}\r\n\r\nThe limit $L\\rightarrow\\infty$ implies that the variable $k_{n}$ becomes\r\ncontinuos:\r\n\\begin{equation}\r\nk_{n}=\\frac{2\\pi n}{L}\\rightarrow k\\subset(-\\infty,+\\infty).\r\n\\end{equation}\r\nAs usual, when $L$ is sent to infinity sums turn into integrals\n\\begin{equation}\r\n\\sum_{n}=\\frac{L}{2\\pi}\\sum_{n}\\frac{2\\pi}{L}\\rightarrow\\frac{L}{2\\pi\n\\int_{-\\infty}^{+\\infty}\\mathrm{dk}=L\\int_{-\\infty}^{+\\infty}\\frac\r\n{\\mathrm{dk}}{2\\pi}\\text{ ,\n\\end{equation}\r\nwhere $\\delta k=2\\pi\/L$ has been introduced in order to generate the\r\ndifferential $dk$. Next, we turn to the kets $\\left\\vert k\\right\\rangle $ in\r\nthe continuous limit, for which we expect that $\\left\\langle k_{1\n|k_{2}\\right\\rangle =\\delta(k_{1}-k_{2})$. To this end, let us write down the\r\nfollowing $L$-dependent discrete representation of the Dirac-delta function\n\r\n\\begin{equation}\r\n\\delta_{L}(k_{n})=\\int_{-L\/2}^{L\/2}\\frac{\\mathrm{dx}}{2\\pi}e^{ik_{n\nx}=\\left\\{\r\n\\begin{array}\r\n[c]{c\n0\\text{ for }n\\neq0\\\\\r\n\\frac{L}{2\\pi}\\text{ for }n=0\r\n\\end{array}\r\n\\right. \\text{ .\n\\end{equation}\r\nIn the limit $L\\rightarrow\\infty$ one obtains (for an arbitrary function\r\n$u(k)$):\r\n\\begin{equation}\r\nu(0)=\\sum_{n}\\delta k\\delta_{L}(k_{n})u(k_{n})\\rightarrow\\int_{-\\infty\r\n}^{+\\infty}\\mathrm{dk}\\delta(k)u(k)=u(0)\r\n\\end{equation}\r\nshowing that $\\delta(k)=\\lim_{L\\rightarrow\\infty}\\delta_{L}(k_{n})$ holds.\r\nFinally, the quite subtle link between $\\left\\vert k_{n}\\right\\rangle $ and\r\n$\\left\\vert k\\right\\rangle $ is given by\n\\begin{equation}\r\n\\left\\vert k_{n}\\right\\rangle \\overset{L\\rightarrow\\infty}{=}\\sqrt{\\frac{2\\pi\r\n}{L}}\\left\\vert k\\right\\rangle \\text{ .\n\\end{equation}\r\nNamely:\r\n\\begin{equation}\r\n\\left\\langle k_{1}|k_{2}\\right\\rangle =\\lim_{L\\rightarrow\\infty}\\frac{L}{2\\pi\r\n}\\left\\langle k_{n_{1}}|k_{n_{2}}\\right\\rangle =\\lim_{L\\rightarrow\\infty\r\n}\\left\\{\r\n\\begin{array}\r\n[c]{c\n0\\text{ for }n_{1}\\neq n_{2}\\\\\r\n\\frac{L}{2\\pi}=\\delta_{L}(0)\\text{ for }n_{1}=n_{2\n\\end{array}\r\n\\right. =\\delta(k_{1}-k_{2})\\text{ ,\n\\end{equation}\r\nas desired. (Note, in 3D we have $\\sum_{\\mathbf{k}}\\rightarrow V\\int\r\n\\frac{d^{3}k}{(2\\pi)^{3}}$ ,where $V=L^{3},$ and $\\left\\vert \\mathbf{k\n=2\\pi\\mathbf{n}\/L\\right\\rangle \\rightarrow(2\\pi)^{3\/2}\/\\sqrt{V}\\left\\vert\r\n\\mathbf{k}\\right\\rangle $). It is also quite peculiar that the dimension of\r\nthe ket changes when considering the limit $L\\rightarrow\\infty$\n\\begin{equation}\r\n\\dim[\\left\\vert k_{n}\\right\\rangle ]=[\\text{Energy}^{0}]\\text{ (dimensionless)\r\n, }\\dim[\\left\\vert k\\right\\rangle ]=[\\text{Energy}^{-1\/2}]\\text{ .\n\\end{equation}\r\nThen, the continuos Hilbert space is given by $\\mathcal{H}=\\left\\{ \\left\\vert\r\nS\\right\\rangle ,\\left\\vert k\\right\\rangle \\right\\} $ wit\n\\begin{equation}\r\n\\left\\langle S|S\\right\\rangle =1\\text{ , }\\left\\langle S|k\\right\\rangle\r\n=0\\text{ },\\text{ }\\left\\langle k_{1}|k_{2}\\right\\rangle =\\delta(k_{1\n-k_{2})\\text{ .\n\\end{equation}\r\nWe also check the completeness relation:\r\n\\begin{equation}\r\n1_{\\mathcal{H}}=\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\sum\r\n_{n}\\left\\vert k_{n}\\right\\rangle \\left\\langle k_{n}\\right\\vert =\\left\\vert\r\nS\\right\\rangle \\left\\langle S\\right\\vert +\\sum_{n}\\delta k\\left( \\sqrt\r\n{\\frac{L}{2\\pi}}\\left\\vert k_{n}\\right\\rangle \\left\\langle k_{n}\\right\\vert\r\n\\sqrt{\\frac{L}{2\\pi}}\\right) \\overset{L\\rightarrow\\infty}{\\rightarrow\r\n}\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\int_{-\\infty}^{+\\infty\r\n}dk\\left\\vert k\\right\\rangle \\left\\langle k\\right\\vert =1_{\\mathcal{H}\n\\end{equation}\r\nFinally, we are ready to present the Lee Hamiltonian $H=H_{0}+H_{1}$ in the\r\ncontinuous limit\n\r\n\\begin{equation}\r\nH_{0}=M\\left\\vert S\\right\\rangle \\left\\langle S\\right\\vert +\\int_{-\\infty\r\n}^{+\\infty}\\mathrm{dk}\\omega(k)\\left\\vert k\\right\\rangle \\left\\langle\r\nk\\right\\vert \\text{ , }H_{1}=\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\frac\r\n{gf(k)}{\\sqrt{2\\pi}}\\left( \\left\\vert S\\right\\rangle \\left\\langle\r\nk\\right\\vert +\\left\\vert k\\right\\rangle \\left\\langle S\\right\\vert \\right)\r\n\\text{ .}\\nonumber\r\n\\end{equation}\r\nOne can verify that the dimensions is preserved. For instance:\r\n\\begin{equation}\r\n\\dim\\left[ dk\\omega(k)\\left\\vert k\\right\\rangle \\left\\langle k\\right\\vert\r\n\\right] =\\dim[dk]\\dim[\\omega(k)]\\dim^{2}[\\left\\vert k\\right\\rangle\r\n]=[\\text{Energy}][\\text{Energy}][\\text{Energy}^{-1}]=[\\text{Energy}]\\text{ .\n\\end{equation}\r\n\r\n\r\n\\section{Determination of the survival probability}\r\n\r\n\\subsection{Time evolution operator}\r\n\r\nThe Schr\\\"{o}dinger equation (in natural units\n\\begin{equation}\r\ni\\frac{\\partial\\left\\vert \\psi(t)\\right\\rangle }{\\partial t}=H\\left\\vert\r\n\\psi(t)\\right\\rangle\r\n\\end{equation}\r\ncan be univocally solved for a certain given initial state\r\n\\begin{equation}\r\n\\left\\vert \\psi(0)\\right\\rangle =c_{S}\\left\\vert S\\right\\rangle +\\sum_{n\nc_{n}\\left\\vert k_{n}\\right\\rangle \\overset{L\\rightarrow\\infty}{\\equiv\nc_{S}\\left\\vert S\\right\\rangle +\\int_{-\\infty}^{+\\infty}\\mathrm{dk\nc(k)\\left\\vert k\\right\\rangle\r\n\\end{equation}\r\nwith $c(k)\\overset{L\\rightarrow\\infty}{\\equiv}\\sqrt{\\frac{L}{2\\pi}}c_{n}$ .\r\nThe normalization $\\left\\langle \\psi(t)|\\psi(t)\\right\\rangle =1$ implie\n\\begin{equation}\r\n1=\\left\\vert c_{S}\\right\\vert ^{2}+\\sum_{n}\\left\\vert c_{n}\\right\\vert\r\n^{2}\\overset{L\\rightarrow\\infty}{\\equiv}\\left\\vert c_{S}\\right\\vert ^{2\n+\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\left\\vert c(k)\\right\\vert ^{2}\\text{ .\n\\end{equation}\r\nIn particular, one is typically interested to the case $c_{S}=1$ (but not\r\nonly). A formal solution to the time evolution is obtained by introducing the\r\ntime-evolution operator\n\r\n\\begin{equation}\r\nU(t)=e^{-iHt}\\rightarrow\\left\\vert \\psi(t)\\right\\rangle =U(t)\\left\\vert\r\n\\psi(0)\\right\\rangle \\text{ . \n\\end{equation}\r\nThe time-evolution operator $U(t)$ can be expressed in terms of a Fourier\r\ntransform (for $t>0$)\n\\begin{equation}\r\nU(t)=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}\\frac{1\n{E-H+i\\varepsilon}e^{-iEt}\\text{ }=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty\r\n}\\mathrm{dE}G(E)e^{-iEt}\\text{ with }G(E)=\\frac{1}{E-H+i\\varepsilon}\\text{ ,}\r\n\\label{uoft\n\\end{equation}\r\nwhere $\\varepsilon$ is an infinitesimal quantity and $G(E)$ is the `propagator\r\noperator', which can be expanded as\n\r\n\\begin{equation}\r\nG(E)=\\frac{1}{E-H+i\\varepsilon}=\\sum_{n=0}^{\\infty}\\left( \\frac{1\n{E-H_{0}+i\\varepsilon}H_{1}\\right) ^{n}\\frac{1}{E-H_{0}+i\\varepsilon}\\text{\r\n,\n\\end{equation}\r\nwhere we have used that $(AB)^{-1}=B^{-1}A^{-1}$ ($A,B$ arbitrary operators on\r\nthe Hilbert space $\\mathcal{H}$).\r\n\r\n\\subsection{Propagator,\\ Feynman rules, and survival probability}\r\n\r\nWe are interested in the evaluation of the survival (or non-decay) probability\r\namplitude $a_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle $,\r\nout of which the survival probability of the state $S$ reads $p_{S\n(t)=\\left\\vert a_{S}(t)\\right\\vert ^{2}.$ In the \\textit{trivial limit,} in\r\nwhich $H=H_{0}$ ( $g\\rightarrow0$), one has\r\n\\begin{equation}\r\na_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle =\\left\\langle\r\nS\\right\\vert e^{-iH_{0}t}\\left\\vert S\\right\\rangle =e^{-iM_{0}t}\\rightarrow\r\np_{S}(t)=1\\text{ .\n\\end{equation}\r\nAlternatively, one may use Eq. (\\ref{uoft}):\r\n\\begin{equation}\r\na_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle =\\frac\r\n{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}\\frac{1}{E-M_{0}+i\\varepsilon\r\n}e^{-iEt}\\text{ }=e^{-iM_{0}t}\\text{ ,\n\\end{equation}\r\nwhere we have closed downwards and picked up the pole for $E=M_{0\n-i\\varepsilon$ (one is obliged to close downwards to guarantee convergence).\r\nIn passing by, we note that the object\r\n\\begin{equation}\r\nG_{S}^{\\text{free}}(E)=G_{S}^{(0)}(E)=\\left\\langle S\\right\\vert \\frac\r\n{1}{E-H_{0}+i\\varepsilon}\\left\\vert S\\right\\rangle =\\frac{1}{E-M_{0\n+i\\varepsilon\n\\end{equation}\r\nis the free propagator of the state $S.$\r\n\r\nIn the \\textit{interacting case }the evaluation of\\textit{ }$a(t)$\\textit{\r\n}proceeds as follow:\r\n\\begin{equation}\r\na_{S}(t)=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}G_{S}(E)e^{-iEt\n\\text{ , where }G_{S}(E)=\\left\\langle S\\right\\vert G(E)\\left\\vert\r\nS\\right\\rangle =\\left\\langle S\\right\\vert \\frac{1}{E-H+i\\varepsilon}\\left\\vert\r\nS\\right\\rangle \\label{as1\n\\end{equation}\r\nis the full propagator of $S.$ It is now necessary to evaluate $G_{S}(E)$\r\nexplicitly though a lengthy but straightforward calculation \\cite{duecan}:\r\n\\begin{equation}\r\nG_{S}(E)=\\left\\langle S\\right\\vert G(E)\\left\\vert S\\right\\rangle =\\sum\r\n_{n=0}^{\\infty}\\left\\langle S\\right\\vert \\left( \\frac{1}{E-H_{0\n+i\\varepsilon}H_{1}\\right) ^{n}\\frac{1}{E-H_{0}+i\\varepsilon}\\left\\vert\r\nS\\right\\rangle =\\sum_{n=0}^{\\infty}G_{S}^{(n)}(E)\r\n\\end{equation}\r\nwit\n\\begin{equation}\r\nG_{S}^{(n)}(E)=\\left\\langle S\\right\\vert \\left( \\frac{1}{E-H_{0\n+i\\varepsilon}H_{1}\\right) ^{n}\\left\\vert S\\right\\rangle \\frac{1\n{E-M_{0}+i\\varepsilon}\\text{ .\n\\end{equation}\r\nLet us evaluate the first three terms\n\\begin{equation}\r\nn=0\\rightarrow G_{S}^{(0)}(E)=\\left\\langle S\\right\\vert 1\\left\\vert\r\nS\\right\\rangle \\frac{1}{E-M_{0}+i\\varepsilon}=\\frac{1}{E-M_{0}+i\\varepsilon\r\n}\\text{ ,\n\\end{equation\n\\begin{equation}\r\nn=1\\rightarrow\\left\\langle S\\right\\vert \\frac{1}{E-H_{0}+i\\varepsilon\nH_{1}\\left\\vert S\\right\\rangle \\frac{1}{E-M_{0}+i\\varepsilon}=0\\text{ ,\n\\end{equation}\r\n\r\n\\begin{align}\r\nn & =2\\rightarrow G_{S}^{(1)}(E)=\\left\\langle S\\right\\vert \\left( \\frac\r\n{1}{E-H_{0}+i\\varepsilon}H_{1}\\right) ^{2}\\left\\vert S\\right\\rangle \\frac\r\n{1}{E-M_{0}+i\\varepsilon}\\\\\r\n& =\\frac{1}{E-M_{0}+i\\varepsilon}\\left\\langle S\\right\\vert H_{1}\\frac\r\n{1}{E-H_{0}+i\\varepsilon}H_{1}\\left\\vert S\\right\\rangle \\frac{1\n{E-M_{0}+i\\varepsilon}=-\\frac{\\Pi(E)}{\\left( E-M_{0}+i\\varepsilon\\right)\r\n^{3}}\\text{ .\n\\end{align}\r\nThe recursive quantity $\\Pi(E)$ reads\n\\begin{equation}\r\n\\Pi(E)=-\\left\\langle S\\right\\vert H_{1}\\frac{1}{E-H_{0}+i\\varepsilon\nH_{1}\\left\\vert S\\right\\rangle \\text{ .\n\\end{equation}\r\nWe introduce $1_{\\mathcal{H}}=\\left\\vert S\\right\\rangle \\left\\langle\r\nS\\right\\vert +\\int_{-\\infty}^{+\\infty}dk\\left\\vert k\\right\\rangle \\left\\langle\r\nk\\right\\vert $ two times, obtaining:\r\n\\begin{align}\r\n\\Pi(E) & =-\\left\\langle S\\right\\vert H_{1}1_{\\mathcal{H}}\\frac{1\n{E-H_{0}+i\\varepsilon}1_{\\mathcal{H}}H_{1}\\left\\vert S\\right\\rangle\r\n=-\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\int_{-\\infty}^{+\\infty}\\mathrm{dq\n\\left\\langle S\\right\\vert H_{1}\\left\\vert k\\right\\rangle \\left\\langle\r\nk\\right\\vert \\frac{1}{E-H_{0}+i\\varepsilon}\\left\\vert q\\right\\rangle\r\n\\left\\langle q\\right\\vert H_{1}\\left\\vert S\\right\\rangle \\nonumber\\\\\r\n& =-\\int_{-\\infty}^{+\\infty}\\mathrm{dk}\\int_{-\\infty}^{+\\infty\n\\mathrm{dq}\\frac{gf(k)}{\\sqrt{2\\pi}}\\frac{\\delta(k-q)}{E-\\omega\r\n(k)+i\\varepsilon}\\frac{gf(q)}{\\sqrt{2\\pi}}=-\\int_{-\\infty}^{+\\infty\n\\frac{\\mathrm{dk}}{2\\pi}\\frac{g^{2}f(k)^{2}}{E-\\omega(k)+i\\varepsilon}\\text{\r\n,}\\nonumber\r\n\\end{align}\r\nwhere $\\left\\langle S\\right\\vert H_{1}\\left\\vert k\\right\\rangle =gf(k)\/\\sqrt\r\n{2\\pi}$ was used. Going further, for $n=0,1,2,...$ we get $G_{S\n^{(2n+1)}(E)=0$ and\r\n\\begin{equation}\r\nG_{S}^{(2n)}(E)=\\frac{\\left[ -\\Pi(E)\\right] ^{n}}{\\left( E-M_{0\n+i\\varepsilon\\right) ^{n+1}}\\text{ .\n\\end{equation}\r\nFinally\n\\begin{align}\r\nG_{S}(E) & =\\sum_{n=0}^{\\infty}G_{S}^{(2n)}(E)=\\sum_{n=0}^{\\infty\n\\frac{\\left[ -\\Pi(E)\\right] ^{n}}{\\left( E-M_{0}+i\\varepsilon\\right)\r\n^{2n+1}}=\\frac{1}{\\left( E-M_{0}+i\\varepsilon\\right) }\\sum_{n=0}^{\\infty\r\n}\\frac{\\left[ -\\Pi(E)\\right] ^{n}}{\\left( E-M_{0}+i\\varepsilon\\right)\r\n^{n}}\\\\\r\n& =\\frac{1}{\\left( E-M_{0}+i\\varepsilon\\right) }\\frac{1}{1+\\frac{\\Pi\r\n(E)}{E-M_{0}+i\\varepsilon}}=\\frac{1}{E-M_{0}+\\Pi(E)+i\\varepsilon}\\text{ .\n\\end{align}\r\nAt this point, we can identify `Feynman rules' reminiscent of QFT\n\\begin{align}\r\n\\text{bare }S\\text{ propagator } & \\rightarrow\\frac{1}{E-M_{0}+i\\varepsilon\r\n}\\\\\r\n\\text{bare }k\\text{ propagator (}k\\text{ fixed)} & \\rightarrow\\frac\r\n{1}{E-\\omega(k)+i\\varepsilon}\\\\\r\nkS\\text{ vertex} & \\rightarrow gf(k)\\\\\r\n\\text{internal }k\\text{ line(}k\\text{ not fixed)} & \\rightarrow-\\Pi\r\n(E)=\\int_{-\\infty}^{+\\infty}\\frac{dk}{2\\pi}\\frac{g^{2}f(k)^{2}}{E-\\omega\r\n(k)+i\\varepsilon\n\\end{align}\r\nNote, the latter can be understood by applying $gf(k)$ at each vertex and the\r\n$k$-propagator in the middle, and by an overall integration $\\int_{-\\infty\r\n}^{+\\infty}\\frac{dk}{2\\pi}$ due to the fact that $k$ is not fixed.\r\n\r\nThe full propagator of $S$ determined above\n\\begin{equation}\r\n\\text{full }S\\text{ propagator }\\rightarrow\\frac{1}{E-M_{0}+\\Pi\r\n(E)+i\\varepsilon}\\text{ ,\n\\end{equation}\r\n\r\n\r\ncan be also obtained in a very elegant way by using the Bethe-Salpeter\r\nequation obtained by using the Feynman rules listed above:\r\n\\begin{equation}\r\nG_{S}(E)=\\frac{1}{E-M_{0}+i\\varepsilon}-\\frac{1}{E-M_{0}+i\\varepsilon\n\\Pi(E)G_{S}(E)\\text{ .\n\\end{equation}\r\n\r\n\r\nFinally, the survival amplitude (\\ref{as1}) can be expressed as\n\\begin{equation}\r\na_{S}(t)=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}G_{S}(E)e^{-iEt\n=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}\\frac{1}{E-M_{0\n+\\Pi(E)+i\\varepsilon}e^{-iEt}\\text{ .} \\label{as2\n\\end{equation}\r\n\r\n\r\n\\subsection{Spectral function and survival probability}\r\n\r\nLet us denote the basis of eigenstates of the Hamiltonian $H$ as $\\left\\vert\r\nm\\right\\rangle $ wit\n\\begin{equation}\r\nH\\left\\vert m\\right\\rangle =m\\left\\vert m\\right\\rangle \\text{ for }m\\geq\r\nm_{th}\\text{ (}m_{th}\\text{ is the low-energy threshold) .\n\\end{equation}\r\nThe existence of a minimal energy $m_{th}$ is a general physical and\r\nmathematical property. The states $\\left\\vert m\\right\\rangle $ form an\r\northonormal basis of the Hilbert space $\\mathcal{H=}\\{\\left\\vert\r\nm\\right\\rangle $ with $m\\geq m_{th}\\}$, whose elements fulfill standard\r\nrelations:\r\n\\begin{equation}\r\n1_{\\mathcal{H}}=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}\\left\\vert m\\right\\rangle\r\n\\left\\langle m\\right\\vert \\text{ ; }\\left\\langle m_{1}|m_{2}\\right\\rangle\r\n=\\delta(m_{1}-m_{2})\\text{ .\n\\end{equation}\r\nThe link between the `old' basis $\\{\\left\\vert S\\right\\rangle ,\\left\\vert\r\nk\\right\\rangle \\}$ (eigenstates of $H_{0}$) and the `new' basis $\\{\\left\\vert\r\nm\\right\\rangle \\}$ (eigenstates of $H$) is not trivial. The state $\\left\\vert\r\nS\\right\\rangle $ can be expressed in terms of the basis $\\{\\left\\vert\r\nm\\right\\rangle $ $\\}$ as\r\n\\begin{equation}\r\n\\left\\vert S\\right\\rangle =\\int_{m_{th}}^{\\infty}\\mathrm{dm}\\alpha\r\n_{S}(m)\\left\\vert m\\right\\rangle \\text{ with }\\alpha_{S}(m)=\\left\\langle\r\nS|m\\right\\rangle \\text{ .\n\\end{equation}\r\nThe quantity\r\n\\begin{equation}\r\nd_{S}(m)=\\left\\vert \\alpha_{S}(m)\\right\\vert ^{2}=\\left\\vert \\left\\langle\r\nS|m\\right\\rangle \\right\\vert ^{2\n\\end{equation}\r\nis called the \\textbf{spectral function (or energy\/mass distribution) }of the\r\nstate $S$. The normalization of the state $\\left\\vert S\\right\\rangle $ implies\r\nthe normalization of the mass distribution $d_{S}(m)$:\r\n\\begin{equation}\r\n1=\\left\\langle S|S\\right\\rangle =\\int_{m_{th}}^{\\infty}d_{S}(m)\\mathrm{dm\n\\text{ .} \\label{norm\n\\end{equation}\r\nThe simple intuitive interpretation is that $d_{S}(m)\\mathrm{dm}$ represents\r\nthe probability that the state $S$ has a energy (or mass) between $m$ and\r\n$m+dm.$ As a consequence, the time-evolution can be easily evaluated by\r\ninserting $1=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}\\left\\vert m\\right\\rangle\r\n\\left\\langle m\\right\\vert $ two times\n\\begin{equation}\r\na_{S}(t)=\\left\\langle S\\right\\vert U(t)\\left\\vert S\\right\\rangle =\\left\\langle\r\nS\\right\\vert e^{-iHt}\\left\\vert S\\right\\rangle =\\int_{m_{th}}^{\\infty\r\n}\\mathrm{dm}d_{S}(m)e^{-imt}\\text{ .\n\\end{equation}\r\nThis is all formally correct, but it does not help us further as long as we do\r\nnot have a way to calculate $d_{S}(m)$. This is possible by using the\r\npropagator of $S$ studied in Sec. 3.2. In fact, the propagator can be\r\nre-expressed as (again, inserting $1=\\int_{m_{th}}^{+\\infty}dm\\left\\vert\r\nm\\right\\rangle \\left\\langle m\\right\\vert $ two times)\n\\begin{equation}\r\nG_{S}(E)=\\frac{1}{E-M_{0}+\\Pi(E)+i\\varepsilon}=\\left\\langle S\\right\\vert\r\n1\\frac{1}{E-H+i\\varepsilon}1\\left\\vert S\\right\\rangle =\\int_{m_{th}}^{+\\infty\r\n}\\mathrm{dm}\\frac{d_{S}(m)}{E-m+i\\varepsilon}\\text{ .} \\label{kl\n\\end{equation}\r\nIts physical meaning can be understood by noticing that the dressed propagator\r\n$G_{S}(E)$ has been rewritten as the `sum' of free propagators, whose weight\r\nfunction is $d_{S}(m).$ As a next step, we need to invert Eq. (\\ref{kl}). Let\r\nus first consider the case $g=0.$ In this limit, it is evident from Eq.\r\n(\\ref{kl}) that:\r\n\\begin{equation}\r\nd_{S}(E)=\\delta(E-M_{0})\\text{ .\n\\end{equation}\r\nThis is expected because in this case the state $\\left\\vert S\\right\\rangle $\r\nis an eigenstate of the Hamiltonian, hence the mass distribution is a\r\ndelta-function peaked at $M_{0}$. When the interaction is switched on, we\r\nevaluate the imaginary part of Eq. (\\ref{kl}):\r\n\\begin{equation}\r\n\\operatorname{Im}G_{S}(E)=\\int_{m_{th}}^{+\\infty}\\mathrm{dm}\\frac{-\\varepsilon\r\nd_{S}(m)}{(E-m)^{2}+\\varepsilon^{2}}=-\\int_{m_{th}}^{+\\infty}\\mathrm{dm\nd_{S}(m)\\pi\\delta(E-m)=-\\pi d_{S}(E)\\text{ }.\r\n\\end{equation}\r\nHence $d_{S}(E)$ is calculated as\n\\begin{equation}\r\nd_{S}(E)=-\\frac{\\operatorname{Im}G_{S}(E)}{\\pi}=\\frac{1}{\\pi}\\frac\r\n{\\operatorname{Im}\\Pi(E)}{(E-M_{0}+\\operatorname{Re}\\Pi(E))^{2}+\\left(\r\n\\operatorname{Im}\\Pi(E)\\right) ^{2}}\\text{ .} \\label{ds\n\\end{equation}\r\nThe normalization of $d_{S}(E),$ Eq. (\\ref{norm}), can be also proven by using\r\nEq. (\\ref{ds}), see details in\\ Ref. \\cite{gpprd}.\r\n\r\nIn the end, once the spectral function $d_{S}(m)$ is known, the survival\r\namplitude can be re-expressed as its Fourier transform by using Eqs.\r\n(\\ref{as2}) and (\\ref{kl})\n\\begin{align}\r\na_{S}(t) & =\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{dE}G_{S\n(E)e^{-iEt}=\\frac{i}{2\\pi}\\int_{-\\infty}^{+\\infty}\\mathrm{d}E\\int_{m_{th\n}^{+\\infty}\\mathrm{dm}\\frac{d_{S}(m)}{E-m+i\\varepsilon}e^{-imt}\\nonumber\\\\\r\n& =\\int_{m_{th}}^{+\\infty}\\mathrm{dm}d_{S}(m)e^{-imt}=\\int_{m_{th}}^{+\\infty\r\n}\\mathrm{dm}d_{S}(m)e^{-imt}\\text{ .\n\\end{align}\r\nThe latter expression coincides with Eq. (1), whose detailed determination was\r\nour goal. From here on, all the usual strategy can be applied\r\n\\cite{ghirardi,fprev,koshinorev,duecan}. In particular, the (unphysical)\r\nBreit-Wigner limit is obtained for $\\omega(k)=k$ (unlimited from below) and\r\n$f(k)=1,$ out of which $d_{S}(m)=\\frac{\\Gamma}{2\\pi}\\left( \\left(\r\nm-M_{0}\\right) ^{2}+\\Gamma^{2}\/4\\right) ^{-1}$ with $\\Gamma=g^{2}.$ In this\r\ncase, $a_{S}(t)=e^{-i(M_{0}-i\\Gamma\/2)t}$ and $p_{S}(t)=e^{-\\Gamma t}$ (see\r\ndetails in\\ Refs. \\cite{giacosapra,actalast}).\r\n\r\n\\section{Conclusions}\r\n\r\nWe have proven that Eq. (1) holds in the QFT-like approach of effective Lee\r\nHamiltonians by showing all the main steps leading to it. However, a Lee\r\nHamiltonian is not fully equivalent to QFT, since some features are still\r\nmissing. In fact, the Lee approach does not contain transitions from the\r\nvacuum state to some particles (in genuine QFT, terms of the type $\\left\\vert\r\n0\\right\\rangle \\left\\langle S\\varphi_{1}\\varphi_{2}\\right\\vert $+hc are also\r\npart of the interacting Hamiltonian and affect the results for finite time\r\nintervals). Moreover, quadratic expressions are not present in the\r\npropagator(s) of the Lee Hamiltonian but naturally appear in QFT.\r\n\r\nHence, the main question for the future reads: is Eq. (1) as it stands valid\r\nalso in\\ QFT? If, as argued in\\ Refs. \\cite{duecan,zenoqft}, this is true,\r\nnon-exponential decay is realized in QFT both at short and long times.\r\nMoreover, in the interesting case of a super-renormalizable Lagrangian, the\r\nshort-time behavior is independent on the cutoff (this is so because the\r\nenergy distribution $d_{S}(m)$ scales as $m^{-3}$ for large $m$). Note, this\r\nis different from the result of Ref. \\cite{maiani} obtained by using\r\nperturbation theory at second order (that is, without resumming the propagator).\r\n\r\nA further interesting topic for the future is the study of the decay of a\r\nparticle with a nonzero momentum\r\n\\cite{khalfin2,stefanovich,shirokov,urbanowski,giunti,rel}. Contrary to naive\r\nexpectations, the usual relativistic time dilatation formula does not hold\r\n(even in the exponential limit, a different analytical result is obtained, see\r\ndetails in Ref. \\cite{rel}). The full understanding of the study of decay in\r\nQFT can also help to shed light on decays of moving particles.\r\n\r\n\\bigskip\r\n\r\n\\textbf{Acknowledgments: }F. G. thanks S. Mr\\'{o}wczy\\'{n}ski and G. Pagliara\r\nfor very useful discussions. F. G. acknowledges support from the Polish\r\nNational Science Centre (NCN) through the OPUS project no. 2015\/17\/B\/ST2\/01625.\r\n\r\n","meta":{"arxiv_id":"1711.10727","language":"en","source":"arxiv","timestamp":1512032790000,"url":"https:\/\/arxiv.org\/abs\/1711.10727","yymm":"1711"}} diff --git a/docs/Sandbox-ZH.md b/docs/Sandbox-ZH.md index ce73c3265..39322eed5 100644 --- a/docs/Sandbox-ZH.md +++ b/docs/Sandbox-ZH.md @@ -8,52 +8,74 @@ 在使用沙盒实验室前,你可能需要使用如下命令安装沙盒相关的第三方依赖: ```shell pip install -v -e .[sandbox] - -pip install detectron2@git+https://github.com/facebookresearch/detectron2.git@b7c7f4ba82192ff06f2bbb162b9f67b00ea55867 ``` **注意**:一些沙盒的依赖还需要额外的领域依赖。例如,如果用户想要在沙盒中训练一个 ModelScope 平台的NLP模型,那可能需要为 `modelscope` 库 安装额外的 `nlp` 领域依赖(参考其[安装文档](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85) )。 +再比如,使用VBench测评视频时需要安装Detectron2,推荐安装如下分支。 +```shell +pip install detectron2@git+https://github.com/facebookresearch/detectron2.git@b7c7f4ba82192ff06f2bbb162b9f67b00ea55867 +``` 因此如果使用沙盒过程中,这些第三方依赖抛出了一些"未找到模块(Module-Not-Found)"的报错时,用户需要先检查这些库的文档以寻求帮助。 ### 准备沙盒配置文件 -沙盒的主配置文件除了Data-Juicer的配置文件外,还包括了若干额外的参数用于指定沙盒流水线中可能会运行的模型训练、推理、评测等步骤的配置信息,完整的额外参数可参考 [config_all.yaml](https://github.com/modelscope/data-juicer/blob/main/configs/config_all.yaml) 中的“for sandbox or hpo”部分参数。一个sandbox的配置文件示例可参考`configs/demo/sandbox/sandbox.yaml`: +沙河实验总共会依次执行四类任务:数据/模型洞察(`probe_job_configs`)、基于洞察结果的数据菜谱微调迭代(`refine_recipe_job_configs`)、数据处理与模型训练(`execution_job_configs`)和数据/模型评估(`evaluation_job_configs`)。每类任务中,任务按照配置的任务列表依次执行。每个任务需要指定:挂载这个任务的钩子(`hook`),记录中间结果的标记名(`meta_name`),Data-Juicer数据处理参数(`dj_configs`),以及该任务其他的特定参数(`extra_configs`)。这些参数中`hook`是必须指定的,其他允许置空。`dj_configs`可以参考完整的Data-Juicer数据处理参数 [config_all.yaml](https://github.com/modelscope/data-juicer/blob/main/configs/config_all.yaml)。`extra_configs`为任务特定的参数,没有限定,可以是模型训练、推理、评测等参数,比如用`path_k_sigma_recipe`指定利用k-sigma方法微调后的数据菜谱保存路径。一个sandbox的配置文件示例可参考`configs/demo/sandbox/sandbox.yaml`: ```yaml -# Sandbox config example for dataset +# Sandbox config example # global parameters project_name: 'demo-sandbox' -dataset_path: './demos/data/demo-dataset.jsonl' # path to your dataset directory or file -np: 4 # number of subprocess to process your dataset +experiment_name: 'demo-sandbox-run0' # for wandb tracer name +hpo_config: null # path to a configuration file when using auto-HPO tool. + +# configs for each job, the jobs will be executed according to the order in the list +probe_job_configs: + - hook: 'ProbeViaAnalyzerHook' + meta_name: 'analysis_ori_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + +refine_recipe_job_configs: + - hook: 'RefineRecipeViaKSigmaHook' + meta_name: 'analysis_ori_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + path_k_sigma_recipe: './outputs/demo-process/k_sigma_new_recipe.yaml' + +execution_job_configs: + - hook: 'ProcessDataHook' + meta_name: + dj_configs: './outputs/demo-process/k_sigma_new_recipe.yaml' + extra_configs: + - hook: 'TrainModelHook' + meta_name: + dj_configs: + extra_configs: 'configs/demo/sandbox/gpt3_extra_train_config.json' + +evaluation_job_configs: + - hook: 'ProbeViaAnalyzerHook' + meta_name: 'analysis_processed_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + - hook: 'EvaluateDataHook' + meta_name: 'eval_data' + dj_configs: + extra_configs: 'configs/demo/sandbox/gpt3_data_quality_eval_config.yaml' +``` +根据这个配置文件,sandbox: -export_path: './outputs/demo-sandbox/demo-sandbox.jsonl' +1. 先执行Data-Juicer数据分析功能,计算每条数据的指定指标,比如`configs/demo/process.yaml`中,指定`language_id_score_filter`计算了语言分。 -# sandbox configs -# for refining recipe using k-sigma rules -path_k_sigma_recipe: './outputs/demo-sandbox/k_sigma_new_recipe.yaml' +2. 利用Data-Juicer数据分析的结果,用k-sigma方法微调数据菜谱。注意这里需要设置`meta_name`与数据分析时的`meta_name`相同才能利用到分析结果。 -# for gpt3 quality classifier as data evaluator -data_eval_config: 'configs/demo/sandbox/gpt3_data_quality_eval_config.yaml' -#data_eval_config: -# type: dj_text_quality_classifier +3. 用k-sigma方法微调后的菜谱执行Data-Juicer的数据筛选功能。 -# for gpt3 model training -model_train_config: 'configs/demo/sandbox/gpt3_extra_train_config.json' +4. 用筛选后的数据训练模型。 -# process schedule -# a list of several process operators with their arguments -process: - - language_id_score_filter: - lang: 'zh' - min_score: 0.5 -``` -该配置文件的示例中,除了Data-Juicer数据处理相关的配置外,包含了三个额外参数: +5. 分析筛选后的数据。 -- `path_k_sigma_recipe`:用于指定利用k-sigma方法微调后的数据菜谱保存路径 -- `data_eval_config`:用于指定针对数据的评测步骤的配置文件路径。也可以直接将该部分配置以字典形式添加到该字段下 -- `model_train_config`:用于指定利用处理后的数据训练模型步骤的配置文件路径 +6. 用打分器给筛选后的数据打分。 -额外的配置文件可以支持YAML和JSON两种格式,其内容需要根据各个步骤中各个组件具体的实现以及模型、评测支持具体定义。上面例子中涉及到的若干步骤的具体配置内容可参考对应路径下的配置文件内容。 ### 运行沙盒 沙盒的运行入口为`tools/sandbox_starter.py`,使用方法和数据处理与分析工具类似,需要指定沙盒配置文件: ```yaml @@ -115,6 +137,7 @@ python tools/sandbox_starter.py --config configs/demo/sandbox/sandbox.yaml - **执行器(Executor)**:由于数据执行器已经由Data-Juicer的Executor承担,因此此处的执行器特指模型的执行器,包括模型训练、推理、评估等执行器。代码位于`data_juicer/core/sandbox/model_executors.py` - **评估器(Evaluator)**:用于对数据集或者模型进行质量以及性能的评估。代码位于`data_juicer/core/sandbox/evaluators.py` +- **流水线钩子(Hook)**:用于将任务挂载到流水线中。代码位于`data_juicer/core/sandbox/hooks.py` ### 执行器 模型执行器核心功能为对配置文件中指定的模型用指定的数据集进行训练、推理或评测。模型执行器需继承`BaseModelExecutor`并实现若干核心方法: @@ -134,7 +157,7 @@ python tools/sandbox_starter.py --config configs/demo/sandbox/sandbox.yaml - `eval_obj`:待评估的对象 用户也可根据自己的实现方式对这两个参数进行扩展使用。 -## 流水线钩子与工作列表 +## 流水线钩子 正如章节开始部分所说,在流水线中,我们需要实现若干钩子将组件与流水线执行步骤通过工作列表连接起来。被激活的钩子会在流水线的工作列表中进行注册,然后在流水线执行时依次对各个步骤工作列表中的钩子执行。四个步骤对应的工作列表分别如下: 1. **数据/模型洞察**:洞察工作列表 -- probe_jobs @@ -144,25 +167,22 @@ python tools/sandbox_starter.py --config configs/demo/sandbox/sandbox.yaml 通常情况下,我们只需要为一类组件工厂实现一种钩子函数即可。而除了依赖于组件的钩子外,还有一些依赖于Data-Juicer已有功能或工具以及其他第三方库的钩子。这些钩子与依赖的组件、工具以及工作列表的对应关系如下: -| 钩子 | 功能 | 依赖的组件工厂 | 依赖的工具或库 | 注册工作列表 | 触发方式(默认流水线编排) | -| --- | --- | --- | --- | --- | --- | -| `hook_probe_via_analyzer` | 分析与洞察数据集质量、多样性等维度分布 | - | Data-Juicer分析器Analyser | 洞察工作列表(probe_jobs)
评估工作列表(evaluation_jobs) | 恒定触发 | -| `hook_probe_via_model_infer` | 分析与洞察数据集对于模型的影响,挖掘与洞察“难”数据与“脏”数据 | 模型推理工厂(ModelInferExecutorFactory) | - | 洞察工作列表(probe_jobs)
评估工作列表(evaluation_jobs) | sandbox配置文件中包含有效的`model_infer_config`配置参数 | -| `hook_refine_recipe_via_k_sigma` | 根据数据集洞察结果,利用k-sigma方法对数据菜谱超参进行微调 | - | Data-Juicer超参优化工具HPO中的k-sigma菜谱微调工具 | 菜谱微调工作列表(refine_recipe_jobs) | sandbox配置文件中用path_k_sigma_recipe参数指定了微调后菜谱的保存路径 | -| `hook_refine_recipe_via_model_feedback` | 利用模型洞察与反馈结果对数据菜谱超参进行微调 | TODO | - | 菜谱微调工作列表(refine_recipe_jobs) | sandbox配置文件中用path_model_feedback_recipe参数指定了微调后菜谱的保存路径 | -| `hook_process_data` | 基于当前数据菜谱对数据集进行处理与清洗 | - | Data-Juicer数据处理器Executor | 执行工作列表(execution_jobs) | 恒定触发 | -| `hook_train_model` | 基于当前数据集训练一个模型 | 模型训练工厂(ModelTrainExecutorFactory) | - | 执行工作列表(execution_jobs) | sandbox配置文件中包含有效的model_train_config配置参数 | -| `hook_evaluate_data` | 对当前数据集进行数据质量等维度的评估 | 数据评估工厂(DataEvaluatorFactory) | - | 评估工作列表(evaluation_jobs) | sandbox配置文件中包含有效的data_eval_config配置参数 | -| `hook_evaluate_model` | 对当前训练后的模型进行评估 | 模型评估工厂(ModelEvaluatorFactory) | - | 评估工作列表(evaluation_jobs) | sandbox配置文件中包含有效的model_eval_config配置参数 | +| 钩子 | 功能 | 依赖的组件工厂 | 依赖的工具或库 | 注册工作列表 | +| --- | --- | --- | --- | --- | +| `ProbeViaAnalyzerHook` | 分析与洞察数据集质量、多样性等维度分布 | - | Data-Juicer分析器Analyzer | 洞察工作列表(probe_jobs)
评估工作列表(evaluation_jobs) | +| `ProbeViaModelInferHook` | 分析与洞察数据集对于模型的影响,挖掘与洞察“难”数据与“脏”数据 | 模型推理工厂(ModelInferExecutorFactory) | - | 洞察工作列表(probe_jobs)
评估工作列表(evaluation_jobs) | +| `RefineRecipeViaKSigmaHook` | 根据数据集洞察结果,利用k-sigma方法对数据菜谱超参进行微调 | - | Data-Juicer超参优化工具HPO中的k-sigma菜谱微调工具 | 菜谱微调工作列表(refine_recipe_jobs) | +| `RefineRecipeViaModelFeedbackHook` | 利用模型洞察与反馈结果对数据菜谱超参进行微调 | TODO | - | 菜谱微调工作列表(refine_recipe_jobs) | +| `ProcessDataHook` | 基于当前数据菜谱对数据集进行处理与清洗 | - | Data-Juicer数据处理器Executor | 执行工作列表(execution_jobs) | +| `TrainModelHook` | 基于当前数据集训练一个模型 | 模型训练工厂(ModelTrainExecutorFactory) | - | 执行工作列表(execution_jobs) | +| `EvaluateDataHook` | 对当前数据集进行数据质量等维度的评估 | 数据评估工厂(DataEvaluatorFactory) | - | 评估工作列表(evaluation_jobs) | +| `EvaluateModelHook` | 对当前训练后的模型进行评估 | 模型评估工厂(ModelEvaluatorFactory) | - | 评估工作列表(evaluation_jobs) | 值得注意的是,一个钩子可以在多个工作列表进行注册,因为这个钩子在不同的流水线阶段可以扮演不同的角色,比如我们可以对处理前后的数据集都进行分析,以比较数据集处理前后的质量、多样性等维度的变化情况。 + ## 自定义沙盒流水线 -除了沙盒自带的默认流水线过程,开发者还可以在`data_juicer/core/sandbox/pipelines.py`中实现自定义的流水线编排。结合前面几个章节所说,实现开发者自定义的流水线编排大致需要下面这些步骤: +用户直接在参数配置文件中修改任务配置列表即可实现任务修改和编排。 -1. **实现自定义的组件**:开发者既可以基于已有的组件类别与工厂实现更多新的组件,也可以自行创建更多新类别的工厂以及其中的组件 -2. **根据自定义的组件,封装调用该组件的钩子**:可参考代码中的`hook_evaluate_data`方法,该方法调用数据评估器组件对数据集质量进行评估 -3. **将自定义的钩子注册到工作列表中**:可参考代码中的`register_default_jobs`方法,用户也可实现自定义的工作列表以及注册方法 -4. **实现自定义的流水线执行流程编排**:可参考代码中的`one_trial`方法,开发者可根据自己实现的钩子以及工作列表,定制化地调整、编排与构建流水线执行流程,实现自己的`one_trial`方法 ## 监测器 在上述章节中,反复提到“监测”这个概念。流水线会对各个步骤中产生的若干指标都进行监测,这些监测过程都依靠沙盒监测器`SandboxWatcher`实现的。 diff --git a/docs/Sandbox.md b/docs/Sandbox.md index bcda139e7..631b26824 100644 --- a/docs/Sandbox.md +++ b/docs/Sandbox.md @@ -9,51 +9,74 @@ Before using sandbox, you might need to install sandbox-related third-party depe ```shell pip install -v -e .[sandbox] -pip install detectron2@git+https://github.com/facebookresearch/detectron2.git@b7c7f4ba82192ff06f2bbb162b9f67b00ea55867 ``` **NOTICE**: some sandbox-related dependencies require extra domain dependencies. For example, if users want to train an NLP model from ModelScope in the sandbox, you might need to install extra `nlp` dependencies for `modelscope` library (see the [installation docs](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85)). +For example, when using VBench to benchmark videos, it is necessary to install Detectron2. The following branch is recommended for installation. +```shell +pip install detectron2@git+https://github.com/facebookresearch/detectron2.git@b7c7f4ba82192ff06f2bbb162b9f67b00ea55867 +``` So if some Module-Not-Found errors are raised by these third-party libraries when running the sandbox, users need to check their docs first. ### Prepare Configuration Files for Sandbox -The configuration file of the sandbox includes several additional parameters in addition to the configuration of Data-Juicer. These parameters are used to specify the configuration information for model training, inference, evaluation, and other steps that may run in the sandbox pipeline. For the complete set of additional parameters, please refer to the "for sandbox or hpo" section in the [config_all.yaml](https://github.com/modelscope/data-juicer/blob/main/configs/config_all.yaml). An example of a sandbox configuration file can be found in `configs/demo/sandbox/sandbox.yaml`: +The sandbox will sequentially execute four types of jobs: Data/Model Probe (`probe_job_configs`), Iterative Recipe Refinement based on Probe Results(`refine_recipe_job_configs`), Dataset Processing and Model Training (`execution_job_configs`) and Data/Model Evaluation (`evaluation_job_configs`). Within each category of jobs, jobs are carried out in the order specified by the configured job list. Each task requires specifying: the hook for mounting this job (`hook`), the tag name for recording intermediate results (`meta_name`), Data-Juicer data processing parameters (`dj_configs`), as well as other specific parameters for the job (`extra_configs`). Among these parameters, hook is required, while others may be left empty. dj_configs can refer to the full Data-Juicer data processing parameters available in [config_all.yaml](https://github.com/modelscope/data-juicer/blob/main/configs/config_all.yaml). The `extra_configs` are task-specific parameters without restrictions. They can include parameters for model training, inference, evaluation, etc. For example, `path_k_sigma_recipe` can be used to specify the path for saving the data recipe refined using the k-sigma method. An example of a sandbox configuration file can be found at `configs/demo/sandbox/sandbox.yaml`: ```yaml -# Sandbox config example for dataset +# Sandbox config example # global parameters project_name: 'demo-sandbox' -dataset_path: './demos/data/demo-dataset.jsonl' # path to your dataset directory or file -np: 4 # number of subprocess to process your dataset +experiment_name: 'demo-sandbox-run0' # for wandb tracer name +hpo_config: null # path to a configuration file when using auto-HPO tool. + +# configs for each job, the jobs will be executed according to the order in the list +probe_job_configs: + - hook: 'ProbeViaAnalyzerHook' + meta_name: 'analysis_ori_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + +refine_recipe_job_configs: + - hook: 'RefineRecipeViaKSigmaHook' + meta_name: 'analysis_ori_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + path_k_sigma_recipe: './outputs/demo-process/k_sigma_new_recipe.yaml' + +execution_job_configs: + - hook: 'ProcessDataHook' + meta_name: + dj_configs: './outputs/demo-process/k_sigma_new_recipe.yaml' + extra_configs: + - hook: 'TrainModelHook' + meta_name: + dj_configs: + extra_configs: 'configs/demo/sandbox/gpt3_extra_train_config.json' + +evaluation_job_configs: + - hook: 'ProbeViaAnalyzerHook' + meta_name: 'analysis_processed_data' + dj_configs: 'configs/demo/process.yaml' + extra_configs: + - hook: 'EvaluateDataHook' + meta_name: 'eval_data' + dj_configs: + extra_configs: 'configs/demo/sandbox/gpt3_data_quality_eval_config.yaml' +``` +Based on this configuration file, sandbox: -export_path: './outputs/demo-sandbox/demo-sandbox.jsonl' +1. Execute the Data-Juicer data analysis function to calculate specified metrics for each piece of data, for example, in `configs/demo/process.yaml`, the `language_id_score_filter` is designated to calculate language scores. -# sandbox configs -# for refining recipe using k-sigma rules -path_k_sigma_recipe: './outputs/demo-sandbox/k_sigma_new_recipe.yaml' +2. With the results from Data-Juicer data analysis, fine-tune the data recipe using the k-sigma method. Note that the `meta_name` here must be set the same as the `meta_name` used during data analysis to utilize the results. -# for gpt3 quality classifier as data evaluator -data_eval_config: 'configs/demo/sandbox/gpt3_data_quality_eval_config.yaml' -#data_eval_config: -# type: dj_text_quality_classifier +3. Execute Data-Juicer's data filtering function with the data recipe fine-tuned by the k-sigma method. -# for gpt3 model training -model_train_config: 'configs/demo/sandbox/gpt3_extra_train_config.json' +4. Train the model with the filtered data. -# process schedule -# a list of several process operators with their arguments -process: - - language_id_score_filter: - lang: 'zh' - min_score: 0.5 -``` -In the example configuration file, in addition to the Data-Juicer data processing related configurations, there are three additional parameters: +5. Analyze the data after filtering. -- `path_k_sigma_recipe`: Used to specify the save path for the refined recipe using the k-sigma method. -- `data_eval_config`: Used to specify the configuration file path for the data evaluation step. This part of the configuration can also be directly added as a dictionary under this field. -- `model_train_config`: Used to specify the configuration file path for training models using the processed data. +6. Score the data after filtering with a scorer. -Additional configuration files can support both YAML and JSON formats, and their contents need to be specifically defined based on the implementation of each component used in each step, as well as the models and evaluation support. The specific configuration contents of the steps involved in this example above can be referred to as configuration file contents in the corresponding path. ### Start Sandbox The entry point for running the sandbox is `tools/sandbox_starter.py`. The usage is similar to the data processing and analysis tool, requiring specifying the sandbox configuration file: ```yaml @@ -115,6 +138,7 @@ Currently, components are mainly divided into two major categories: - **Executor**: Since the data executor is already handled by the Data-Juicer's Executor, the executor here specifically refers to the model executor, including model training, inference, evaluation, etc. The code is located in `data_juicer/core/sandbox/model_executors.py`. - **Evaluator**: Used for evaluating the quality and performance of datasets or models. The code is located in `data_juicer/core/sandbox/evaluators.py`. +- **Hook**: Used to mount tasks onto the pipeline. The code is located in `data_juicer/core/sandbox/hooks.py`. ### Executor The core function of the model executor is to train, infer, or evaluate the model specified in the configuration file with the specified dataset. The model executor needs to inherit from `BaseModelExecutor` and implement several core methods: @@ -133,7 +157,7 @@ The core function of the evaluator is to evaluate the quality and performance of - `eval_obj`: The object to be evaluated. Users can also extend the usage of these two parameters based on their implementation. -## Pipeline Hook and Job List +## Pipeline Hook As mentioned at the start of this section, in the pipeline, we need to implement several hooks to connect components with the pipeline execution steps through the job list. Activated hooks will be registered in the pipeline's job list and then executed one by one during the pipeline execution at each step. The job lists for the four corresponding steps are as follows: 1. **Data/Model Probe**: Probe job list -- probe_jobs @@ -143,25 +167,22 @@ As mentioned at the start of this section, in the pipeline, we need to implement In general, we only need to implement one type of hook function for a type of component factory. In addition to hooks that depend on components, some hooks depend on the existing functionality and tools of Data-Juicer or other third-party libraries. The correspondence among these hooks, dependent components, tools, and job lists is as follows: -| Hook | Function | Dependent Component Factory | Dependent Tool or Library | Registered Job List | Activation Method
(Default Pipeline Orchestration) | -| --- | --- | --- | --- | --- | --- | -| `hook_probe_via_analyzer` | Analyse and probe the quality and diversity distribution of the dataset | - | Data-Juicer Analyser |
- probe_jobs
- evaluation_jobs
| Always | -| `hook_probe_via_model_infer` | Analyze and understand the impact of the dataset on the model, explore and probe "difficult" and "dirty" data | ModelInferExecutorFactory | - |
- probe_jobs
- evaluation_jobs
| There are valid `model_infer_config` parameters in the sandbox configuration | -| `hook_refine_recipe_via_k_sigma` | Refine data recipe hyperparameters using the k-sigma method based on the probe results of the dataset | - | k-sigma recipe refinement tool of Data-Juicer Hyperparameter Optimization (HPO) toolkit |
- refine_recipe_jobs
| There are valid `path_k_sigma_recipe` parameters in the sandbox configuration to specify the path to save the refined recipe | -| `hook_refine_recipe_via_model_feedback` | Refine data recipe hyperparameters using model probe and feedback results | TODO | - |
- refine_recipe_jobs
| There are valid `path_model_feedback_recipe` parameters in the sandbox configuration to specify the path to save the refined recipe | +| Hook | Function | Dependent Component Factory | Dependent Tool or Library | Registered Job List | +| --- | --- | --- | --- | --- | +| `hook_probe_via_analyzer` | Analyze and probe the quality and diversity distribution of the dataset | - | Data-Juicer Analyzer |
- probe_jobs
- evaluation_jobs
| +| `hook_probe_via_model_infer` | Analyze and understand the impact of the dataset on the model, explore and probe "difficult" and "dirty" data | ModelInferExecutorFactory | - |
- probe_jobs
- evaluation_jobs
| +| `hook_refine_recipe_via_k_sigma` | Refine data recipe hyperparameters using the k-sigma method based on the probe results of the dataset | - | k-sigma recipe refinement tool of Data-Juicer Hyperparameter Optimization (HPO) toolkit |
- refine_recipe_jobs
| +| `hook_refine_recipe_via_model_feedback` | Refine data recipe hyperparameters using model probe and feedback results | TODO | - |
- refine_recipe_jobs
| | `hook_process_data` | Process and clean the dataset based on the current data recipe | - | Data-Juicer Executor |
- execution_jobs
| Always | -| `hook_train_model` | Train a model based on the current dataset | ModelTrainExecutorFactory | - |
- execution_jobs
| There are valid `model_train_config` parameters in the sandbox configuration | -| `hook_evaluate_data` | Evaluate the dataset in terms of data quality and other dimensions | DataEvaluatorFactory | - |
- evaluation_jobs
| There are valid `data_eval_config` parameters in the sandbox configuration | -| `hook_evaluate_model` | Evaluate the trained model | ModelEvaluatorFactory | - |
- evaluation_jobs
| There are valid `model_eval_config` parameters in the sandbox configuration | +| `hook_train_model` | Train a model based on the current dataset | ModelTrainExecutorFactory | - |
- execution_jobs
| +| `hook_evaluate_data` | Evaluate the dataset in terms of data quality and other dimensions | DataEvaluatorFactory | - |
- evaluation_jobs
| +| `hook_evaluate_model` | Evaluate the trained model | ModelEvaluatorFactory | - |
- evaluation_jobs
| It is worth noting that a hook can be registered in multiple job lists, as this hook can play different roles in different steps of the pipeline. For example, we can analyze and probe both the pre-processed and post-processed datasets to compare the changes in quality, diversity, and other dimensions before and after data processing. + ## Customized Sandbox Pipeline -In addition to the default sandbox pipeline, developers can also implement customized pipeline orchestration in `data_juicer/core/sandbox/pipelines.py`. Combining the concepts discussed in previous sections, implementing a customized pipeline orchestration by developers generally involves the following steps: +Users can directly modify the job configuration list in the parameter configuration file to achieve task modification and orchestration. -1. **Implementing customized components**: Developers can create new components based on existing factories, or create new categories of factories and their components. -2. **Encapsulate the hooks to call the customized components**: For example, reference the code in the method `hook_evaluate_data`, which calls the data evaluation component to evaluate the quality of datasets. -3. **Register the customized hooks into the job list**: Developers can implement customized job lists and registration methods. Reference the code in the `register_default_jobs` method for guidance. -4. **Implement a customized pipeline orchestration**: Based on the customized hooks and job lists, developers can customize, arrange, and build the pipeline execution process according to their specific requirements, as illustrated in the `one_trial` method in the pipeline. ## Watcher In the above sections, the concept of "monitoring" is repeatedly mentioned. The pipeline will monitor several metrics produced in each step, and these monitoring processes are implemented by `SandboxWatcher`. diff --git a/tools/analyze_data.py b/tools/analyze_data.py index a6487131b..4e100d065 100644 --- a/tools/analyze_data.py +++ b/tools/analyze_data.py @@ -1,12 +1,12 @@ from loguru import logger -from data_juicer.core import Analyser +from data_juicer.core import Analyzer @logger.catch(reraise=True) def main(): - analyser = Analyser() - analyser.run() + analyzer = Analyzer() + analyzer.run() if __name__ == '__main__': diff --git a/tools/hpo/execute_hpo_3sigma.py b/tools/hpo/execute_hpo_3sigma.py index 975f94114..4073fa0d9 100644 --- a/tools/hpo/execute_hpo_3sigma.py +++ b/tools/hpo/execute_hpo_3sigma.py @@ -1,10 +1,14 @@ import copy +import json import sys +from argparse import Namespace +import yaml +from jsonargparse import namespace_to_dict from loguru import logger -from data_juicer.config import export_config, init_configs -from data_juicer.core import Analyser, Executor +from data_juicer.config import init_configs +from data_juicer.core import Analyzer, Executor from data_juicer.utils.constant import StatsKeys @@ -20,9 +24,9 @@ def main(): cfg = init_configs() logger.info('Begin to analyze data using the given initial recipe') - analyser = Analyser(cfg) - analyser.run() - df = analyser.overall_result + analyzer = Analyzer(cfg) + analyzer.run() + df = analyzer.overall_result # 2. adjust the hyper-parameters of the given recipe with k-sigma rule modify_recipe_k_sigma(cfg, df, path_k_sigma_recipe) @@ -45,6 +49,9 @@ def modify_recipe_k_sigma(cfg, df, path_k_sigma_recipe, k=3): stats_key_to_std = std_series.iloc[0, :].to_dict() op_name_to_stats_key = StatsKeys.get_access_log(dj_cfg=cfg) logger.info(f'Begin to modify the recipe with {k}-sigma rule') + for i in range(len(cfg.process)): + if isinstance(cfg.process[i], Namespace): + cfg.process[i] = namespace_to_dict(cfg.process[i]) for process in cfg.process: op_name, args = list(process.items())[0] temp_args = copy.deepcopy(args) @@ -69,7 +76,18 @@ def modify_recipe_k_sigma(cfg, df, path_k_sigma_recipe, k=3): f'{arg_name}={new_val}') args[arg_name] = new_val if path_k_sigma_recipe: - export_config(cfg, path_k_sigma_recipe) + if path_k_sigma_recipe.endswith( + '.yaml') or path_k_sigma_recipe.endswith('.yml'): + with open(path_k_sigma_recipe, 'w') as fout: + yaml.safe_dump(cfg, fout) + elif path_k_sigma_recipe.endswith('.json'): + with open(path_k_sigma_recipe, 'w') as fout: + json.dump(cfg, fout) + else: + raise TypeError( + f'Unrecognized output file type:' + f' [{path_k_sigma_recipe}]. Should be one of the types' + f' [".yaml", ".yml", ".json"].') if __name__ == '__main__': diff --git a/tools/sandbox_starter.py b/tools/sandbox_starter.py index e1f8d91a4..0bb30209f 100644 --- a/tools/sandbox_starter.py +++ b/tools/sandbox_starter.py @@ -1,66 +1,117 @@ -import json +from argparse import ArgumentError +from typing import List, Union -import yaml -from jsonargparse import dict_to_namespace +from jsonargparse import ActionConfigFile, ArgumentParser, dict_to_namespace from loguru import logger -from data_juicer.config import init_configs +from data_juicer.config import prepare_side_configs from data_juicer.core.sandbox.pipelines import SandBoxExecutor +from data_juicer.utils.constant import JobRequiredKeys -def prepare_side_configs(config): - if isinstance(config, str): - # config path - if config.endswith('.yaml') or config.endswith('.yml'): - with open(config) as fin: - config = yaml.safe_load(fin) - return dict_to_namespace(config) - elif config.endswith('.json'): - with open(config) as fin: - config = json.load(fin) - return dict_to_namespace(config) - else: - raise TypeError(f'Unrecognized config file type [{config}]. ' - f'Should be one of the types [".yaml", ".yml", ' - f'".json"].') - elif isinstance(config, dict): - # config dict - config = dict_to_namespace(config) - return config - else: - raise TypeError(f'Unrecognized side config type: [{type(config)}.') - - -def split_configs(cfg): +def init_sandbox_configs(args=None): """ - Split train/infer/eval configs from the original config. Other configs can - be specified by their dict objects or config file path strings. + initialize the jsonargparse parser and parse configs from one of: + 1. POSIX-style commands line args; + 2. config files in yaml (json and jsonnet supersets); + 3. environment variables + 4. hard-coded defaults + + :param args: list of params, e.g., ['--conifg', 'cfg.yaml'], defaut None. + :return: a global cfg object used by the Executor or Analyzer + """ + parser = ArgumentParser(default_env=True, default_config_files=None) + + parser.add_argument('--config', + action=ActionConfigFile, + help='Path to a dj basic configuration file.', + required=True) + + parser.add_argument('--project_name', + type=str, + default='hello_world', + help='Name of your data process project.') + + parser.add_argument('--experiment_name', + type=str, + default='experiment1', + help='For wandb tracer name.') + + parser.add_argument( + '--hpo_config', + type=str, + help='Path to a configuration file when using auto-HPO tool.', + required=False) + + parser.add_argument('--probe_job_configs', + type=Union[List[str], List[dict]], + default=[], + help='List of params for each probe job.') + + parser.add_argument('--refine_recipe_job_configs', + type=Union[List[str], List[dict]], + default=[], + help='List of params for each refine-recipe jobs.') + + parser.add_argument('--execution_job_configs', + type=Union[List[str], List[dict]], + default=[], + help='List of params for each execution jobs.') + + parser.add_argument('--evaluation_job_configs', + type=Union[List[str], List[dict]], + default=[], + help='List of params for each evaluation jobs.') + + try: + cfg = parser.parse_args(args=args) + + return cfg + except ArgumentError: + logger.error('Config initialization failed') + + +def specify_job_configs(ori_config): + + config = prepare_side_configs(ori_config) + + for key in JobRequiredKeys: + if key.value not in config: + raise ValueError( + f'Need to specify param "{key.value}" in [{ori_config}]') + + return dict_to_namespace(config) + + +def specify_jobs_configs(cfg): + """ + Specify job configs by their dict objects or config file path strings. :param cfg: the original config :return: a dict of different configs. """ - configs = { - 'dj_cfg': cfg, - } - if cfg.model_infer_config: - configs['model_infer_cfg'] = prepare_side_configs( - cfg.model_infer_config) - if cfg.model_train_config: - configs['model_train_cfg'] = prepare_side_configs( - cfg.model_train_config) - if cfg.data_eval_config: - configs['data_eval_cfg'] = prepare_side_configs(cfg.data_eval_config) - if cfg.model_eval_config: - configs['model_eval_cfg'] = prepare_side_configs(cfg.model_eval_config) - - return configs + cfg.probe_job_configs = [ + specify_job_configs(job_cfg) for job_cfg in cfg.probe_job_configs + ] + cfg.refine_recipe_job_configs = [ + specify_job_configs(job_cfg) + for job_cfg in cfg.refine_recipe_job_configs + ] + cfg.execution_job_configs = [ + specify_job_configs(job_cfg) for job_cfg in cfg.execution_job_configs + ] + cfg.evaluation_job_configs = [ + specify_job_configs(job_cfg) for job_cfg in cfg.evaluation_job_configs + ] + + return cfg @logger.catch def main(): - cfg = init_configs() - configs = split_configs(cfg) - sandbox_executor = SandBoxExecutor(**configs) + cfg = init_sandbox_configs() + cfg = specify_jobs_configs(cfg) + sandbox_executor = SandBoxExecutor(cfg) sandbox_executor.run()