diff --git a/examples/datasets/Dou-ucec-confirmatory.csv b/examples/datasets/Dou-ucec-confirmatory.csv
new file mode 100644
index 00000000..8276f70b
--- /dev/null
+++ b/examples/datasets/Dou-ucec-confirmatory.csv
@@ -0,0 +1,191 @@
+Idx,Case_id,Case_excluded,Batch,Plex,ReporterName,Aliquot_ID,Group,Discovery_study,Age,Sex,Histologic_Type,Histologic_grade,Tumor_size_cm,Height_at_time_of_surgery_cm,Weight_at_time_of_surgery_kg,BMI,Myometrial_invasion,Myometrial_invasion_present_specify,AJCC_tnm_cancer_staging_edition_used,Pathologic_staging_primary_tumor_pt,Pathologic_staging_regional_lymph_nodes_pn,Number_of_pelvic_lymph_nodes_examined,Tumor_stage_pathological,Race,CNV_ratio,CNV_status,POLE,MSIsensor_ratio,MSI_status,Genomic_subtype,Mutation_load,TP53,PTEN,CTNNB1,ARID1A,PIK3CA,xCell_Myeloid_dendritic_cell_activated,xCell_B_cell,xCell_T_cell_CD4+_memory,xCell_T_cell_CD4+_naive,xCell_T_cell_CD4+_(non-regulatory),xCell_T_cell_CD4+_central_memory,xCell_T_cell_CD4+_effector_memory,xCell_T_cell_CD8+_naive,xCell_T_cell_CD8+,xCell_T_cell_CD8+_central_memory,xCell_T_cell_CD8+_effector_memory,xCell_Class-switched_memory_B_cell,xCell_Common_lymphoid_progenitor,xCell_Common_myeloid_progenitor,xCell_Myeloid_dendritic_cell,xCell_Endothelial_cell,xCell_Eosinophil,xCell_Cancer_associated_fibroblast,xCell_Granulocyte-monocyte_progenitor,xCell_Hematopoietic_stem_cell,xCell_Macrophage,xCell_Macrophage_M1,xCell_Macrophage_M2,xCell_Mast_cell,xCell_B_cell_memory,xCell_Monocyte,xCell_B_cell_naive,xCell_Neutrophil,xCell_NK_cell,xCell_T_cell_NK,xCell_Plasmacytoid_dendritic_cell,xCell_B_cell_plasma,xCell_T_cell_gamma_delta,xCell_T_cell_CD4+_Th1,xCell_T_cell_CD4+_Th2,xCell_T_cell_regulatory_(Tregs),xCell_immune_score,xCell_stroma_score,xCell_microenvironment_score,Cibersort_B_cell_naive,Cibersort_B_cell_memory,Cibersort_B_cell_plasma,Cibersort_T_cell_CD8+,Cibersort_T_cell_CD4+_naive,Cibersort_T_cell_CD4+_memory_resting,Cibersort_T_cell_CD4+_memory_activated,Cibersort_T_cell_follicular_helper,Cibersort_T_cell_regulatory_(Tregs),Cibersort_T_cell_gamma_delta,Cibersort_NK_cell_resting,Cibersort_NK_cell_activated,Cibersort_Monocyte,Cibersort_Macrophage_M0,Cibersort_Macrophage_M1,Cibersort_Macrophage_M2,Cibersort_Myeloid_dendritic_cell_resting,Cibersort_Myeloid_dendritic_cell_activated,Cibersort_Mast_cell_activated,Cibersort_Mast_cell_resting,Cibersort_Eosinophil,Cibersort_Neutrophil,Mutation_signature_SBS1,Mutation_signature_SBS5,Mutation_signature_SBS6,Mutation_signature_SBS7a,Mutation_signature_SBS10a,Mutation_signature_SBS10b,Mutation_signature_SBS15,Mutation_signature_SBS20,Mutation_signature_SBS21,Mutation_signature_SBS42,Mutation_signature_SBS54,ABSOLUTE_tumor_purity,Diabetes,Metformin_treatment,Progeny_Androgen,Progeny_EGFR,Progeny_Estrogen,Progeny_Hypoxia,Progeny_JAK.STAT,Progeny_MAPK,Progeny_NFkB,Progeny_p53,Progeny_PI3K,Progeny_TGFb,Progeny_TNFa,Progeny_Trail,Progeny_VEGF,Progeny_WNT,Estimate_StromalScore,Estimate_ImmuneScore,Estimate_ESTIMATEScore,Peritoneal_ascitic_fluid,Tumor_necrosis,Margin_status,Ethnicity,Ethnicity_race_ancestry_identified,Participant_country,Tumor_site,Tumor_site_other,Tumor_focality,Number_of_pelvic_lymph_nodes_positive_for_tumor_by_ihc_staining_only,Number_of_pelvic_lymph_nodes_positive_for_tumor_by_he,Number_of_para-aortic_lymph_nodes_examined,Number_of_para-aortic_lymph_nodes_positive_for_tumor_by_ihc_staining_only,Number_of_para-aortic_lymph_nodes_positive_for_tumor_by_he,Number_of_other_lymph_nodes_examined,Number_of_other_lymph_nodes_positive_for_tumor_by_ihc_staining_only,Number_of_other_lymph_nodes_positive_for_tumor_by_he,Clinical_staging_distant_metastasis_cm,Residual_tumor,Ancillary_studies_estrogen_receptor,Ancillary_studies_progesterone_receptor,Ancillary_studies_mlh1,Ancillary_studies_msh2,Ancillary_studies_msh6,Ancillary_studies_pms2,Ancillary_studies_p53,Ancillary_studies_other_immunohistochemistry_performed,Ancillary_studies_mlh1_promoter_hypermethylation,Ancillary_studies_other_testing_performed,Donor_information_menopause_status,Donor_information_has_the_patient_ever_taken_menopausal_hormone_therapy,Donor_information_number_of_full_term_pregnancies,Ancillary_studies_other_immunohistochemistry_type_and_result,baseline_ancillary_studies_other_testing_type_and_result,History_of_cancer,Alcohol_consumption,Tobacco_smoking_history,Age_at_which_the_participant_started_smoking,Age_at_which_the_participant_stopped_smoking,On_the_days_participant_smoked_how_many_cigarettes_did_he_she_usually_smoke,Number_of_pack_years_smoked,Was_the_participant_exposed_to_secondhand_smoke,Personal_medical_history_history_source,medical_history_current_medications,Current_medications_history_source,Current_medications_medication_name_vitamins_supplements,Cancer_history_cancer_type,Cancer_history_history_source,Cancer_history_history_of_any_treatment,Cancer_history_medical_record_documentation_of_this_history_of_cancer_and_treatment,Procurement_blood_collection_minimum_required_blood_collected,Procurement_blood_collection_number_of_blood_tubes_collected,Follow-up_follow_up_period,Follow-up_is_this_patient_lost_to_follow-up,Follow-up_vital_status_at_date_of_last_contact,Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_last_contact,Follow-up_adjuvant_post-operative_radiation_therapy,Follow-up_adjuvant_post-operative_pharmaceutical_therapy,Follow-up_adjuvant_post-operative_immunological_therapy,Follow-up_tumor_status_at_date_of_last_contact_or_death,Follow-up_has_the_patient_ever_taken_menopausal_hormone_therapy,Follow-up_has_the_patient_ever_taken_oral_contraceptives,Follow-up_has_the_patient_ever_taken_tamoxifen,Follow-up_hypertension,Follow-up_has_the_patient_ever_been_diagnosed_with_diabetes_by_a_physician,Follow-up_number_of_full_term_pregnancies,Follow-up_has_the_patient_had_colorectal_cancer,Follow-up_measure_of_success_of_outcome_at_the_completion_of_initial_first_course_treatment,Follow-up_measure_of_success_of_outcome_at_completion_of_this_follow-up_form,Follow-up_measure_of_success_of_outcome_at_date_of_last_contact_or_death,follow-up_new_tumor_after_initial_treatment,Follow-up_type_of_new_tumor,Follow-up_site_of_new_tumor,Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_new_tumor_after_initial_treatment,Follow-up_additional_surgery_for_new_tumor,Follow-up_additional_treatment_radiation_therapy_for_new_tumor,Follow-up_additional_treatment_pharmaceutical_therapy_for_new_tumor,Follow-up_additional_treatment_immuno_for_new_tumor,Follow-up_days_from_date_of_collection_to_date_of_last_contact,Follow-up_cause_of_death,Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_death,Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor,Follow-up_procedure_type_of_new_tumor,Follow-up_residual_tumor_after_surgery_for_new_tumor
+C3L-00086,C3L-00086,No,b4,16,128N,CPT0092460003,Tumor,No,56,Female,Endometrioid carcinoma,G1 Well differentiated,4,165,80,29.38,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,0,CNV_L,No,10.06,MSI-H,MSI-H,415,WT,Mutated,WT,Mutated,WT,0.164143479,1.70E-02,1.22E-02,0,2.45E-19,2.16E-18,5.14E-02,9.90E-03,2.97E-02,7.16E-02,1.61E-18,1.41E-02,1.55E-02,0,1.87E-02,0,1.91E-03,0,3.60E-19,1.40E-02,1.69E-02,1.59E-02,9.57E-03,1.20E-03,1.00E-19,0,6.63E-03,0,0,5.20E-03,6.04E-02,7.04E-03,0,5.56E-02,0.17289414,0,5.69E-02,0,5.69E-02,7.44E-03,0,6.32E-02,0.201396514,0,6.89E-02,6.41E-02,0.141278353,0,0,0,3.86E-02,0,3.80E-02,7.33E-02,0.218959886,4.88E-02,0,0,3.52E-02,8.61E-04,0,18,64,151,0,0,0,0,53,0,0,0,0.44,Yes,Yes,0.210514771,0.845332306,-7.00E-02,0.631011539,-0.659031004,1.210711348,-0.330955873,8.12E-02,-0.492262958,-0.145793715,-0.40528041,0.806494585,9.75E-02,0.428632856,4018.51975,4826.094192,8844.613942,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,1,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Current smoker: Includes daily and non-daily smokers,Unknown,NA,4,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Metformin|Lisinopril|Amlodipine|Ibuprofen|Omeprazole,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months|60 Months,No|No|No|No|No,Living|Living|Living|Living|Deceased,330|701|1046|1436|1578,No|No|No|No|Yes,No|No|No|No|Yes,No|No|No|No|No,Tumor Free|With Tumor|With Tumor|With Tumor|With Tumor,Unknown|Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes|Yes,1|1|1|1|1,No|No|No|No|No,Not Applicable|Not Applicable|Not Applicable|Not Applicable|Not Applicable,Complete Remission|Unknown|Unknown|Persistent Disease|n/a,n/a|n/a|n/a|n/a|Persistent Disease,No|Yes|Yes|Yes|Yes,n/a|New Primary Tumor|New Primary Tumor|New Primary Tumor|New Primary Tumor,n/a|Other: Breast mets to liver and bone|Other: Breast mets to liver and bone|Other: Breast with mets to bone|Other: Breast with metastatic disease to bone,n/a|580.0|580.0|580.0|580.0,n/a|No|No|No|No,n/a|Yes|Yes|Yes|Yes,n/a|Yes|Yes|Yes|Yes,n/a|No|No|No|No,330.0|701.0|1046.0|1436.0|n/a,n/a|n/a|n/a|n/a|Breast Carcinoma,n/a|n/a|n/a|n/a|1578.0,n/a|n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a|n/a
+C3L-00898,C3L-00898,No,b4,14,128C,CPT0172200008,Tumor,No,54,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.6,152,88,37.88,Present,=50% myometrial invasion,Eighth Edition (2017),3b,0,5,IIIB,White,NA,NA,No,0,MSS,NA,42,WT,Mutated,Mutated,Mutated,Mutated,9.09E-02,0,4.85E-19,1.17E-17,0,7.74E-19,0,0,3.59E-03,1.15E-02,1.38E-18,0,0,0,2.04E-03,1.20E-02,0,0,0,4.06E-02,0,5.94E-03,5.27E-03,0,0,5.55E-18,2.08E-03,0,0,1.10E-17,3.96E-02,7.35E-03,0,0,3.68E-03,2.89E-02,3.75E-03,6.01E-03,9.76E-03,4.35E-02,0,4.62E-02,5.96E-02,0,0.276882335,0,0.143517668,0,0,0,7.72E-03,0.115580977,5.65E-02,8.74E-03,9.06E-02,0,4.94E-02,0,4.81E-02,4.21E-02,1.15E-02,19,21,0,0,0,0,0,0,0,0,0,NA,Yes,Yes,2.697956942,0.786356057,1.40E-02,-0.379503483,0.473007978,1.073747775,0.829383089,1.962050762,1.278534955,0.68325692,0.996951118,0.422786724,-0.45798663,1.994994225,4477.315071,5016.932324,9494.247395,Not Examined,Not identified,Margin(s) involved by invasive carcinoma,Hispanic or Latino,Mexican,Other: Mexico,Other,Endometrium,Multifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Metformin|Ferrous Sulfate|Flexeril|Provera,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,396|746|982|1600,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,No|No|No|No,Yes|Yes|Yes|Yes,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,396.0|746.0|982.0|1600.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-00943,C3L-00943,No,b4,15,130C,CPT0086090003,Tumor,No,63,Female,Endometrioid carcinoma,G1 Well differentiated,3,163,91,34.33,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,White,0,CNV_L,No,0.02,MSS,CNV_L,36,WT,Mutated,Mutated,WT,WT,4.55E-02,3.88E-19,6.28E-04,1.01E-19,0,0,2.97E-04,3.35E-03,1.22E-02,2.05E-02,0,3.06E-18,2.74E-03,1.26E-03,4.46E-03,5.46E-02,5.71E-06,0.130546505,0,7.51E-02,4.50E-03,7.24E-03,2.55E-03,3.30E-03,0,2.52E-19,0,6.14E-05,0,0,3.53E-02,4.31E-03,0,1.51E-02,6.47E-02,1.28E-02,1.63E-02,9.26E-02,0.108946436,3.55E-03,0,0.109157157,3.24E-02,0,0.281253761,2.94E-02,6.24E-03,0,0,0,3.24E-02,6.70E-03,6.83E-02,2.94E-02,0.182317284,2.54E-02,2.34E-02,0.119092507,1.68E-02,0,3.41E-02,8,25,0,0,0,0,0,0,0,0,0,0.28,NA,NA,0.551969114,0.585823101,0.996080842,-0.406584048,9.15E-03,0.721380215,0.380035549,0.410550463,0.177851742,1.717087546,0.586458653,0.121059206,-1.662404381,1.319793474,5918.231281,4417.462317,10335.6936,Negative for malignancy/normal/benign,Not identified,Cannot be assessed,Not reported,Medical record does not state.,Other: Medical record does not state.,Other,Lesion occupies both anterior and posterior aspects,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,1,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Levothyroxine (TIROSINT|cloNIDine HCl (CATAPRES|hydrALAZINE (APRESOLINE,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,291|747|1093,No|No|No,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Never|Never|Never,Yes|Yes|Yes,No|No|No,1|1|1,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,237.0|693.0|1039.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01064,C3L-01064,No,b3,9,129N,CPT0113430004,Tumor,No,54,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.3,160,92,35.74,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,0,Stage I,White,0,CNV_L,No,0,MSS,CNV_L,71,WT,Mutated,Mutated,Mutated,Mutated,8.01E-02,1.50E-03,1.81E-20,0,9.80E-19,0,2.63E-02,2.27E-19,0,4.92E-03,1.19E-19,0,5.51E-03,0,3.91E-03,7.34E-03,0,3.56E-02,6.98E-19,2.07E-02,0,1.94E-03,0,1.19E-03,1.17E-18,0,1.17E-03,0,3.79E-18,2.40E-02,1.86E-02,3.53E-03,0,8.26E-02,2.97E-02,2.34E-03,4.40E-03,2.23E-02,2.67E-02,1.79E-02,0,0.101475896,5.55E-02,0,0.154581642,0,6.93E-02,4.11E-02,0,0,8.48E-02,4.02E-03,0.167085811,1.93E-02,0.144941088,1.04E-03,1.05E-02,0,6.01E-02,2.74E-02,4.09E-02,25,37,0,0,0,0,0,0,0,0,0,0.56,NA,NA,0.422129433,1.432963934,1.287031128,-0.148923826,-7.13E-02,0.298775896,1.103549818,0.560907512,-0.418490806,0.12103621,1.091016887,-0.18203253,-0.326817001,0.685441273,3950.256141,4090.849852,8041.105993,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and posterior endometrium,Unifocal,0,0,0,0,0,1,0,0,Staging Incomplete,R0: No residual tumor,Positive : 90 %,Positive : 65 %,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Normal,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Current Therapy,2,NA,NA,No,"Consumed alcohol in the past, but currently a non-drinker",Current smoker: Includes daily and non-daily smokers,24,NA,20,30,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Benadryl|Estroven|Melatonin,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,524|797|1133|1518,Yes|Yes|Yes|No,Yes|Yes|Yes|Yes,No|No|No|No,With Tumor|Tumor Free|With Tumor|With Tumor,Former Therapy|Former Therapy|Current Therapy|Former Therapy,Former Therapy|Former Therapy|Former Therapy|Former Therapy,Never|Never|Never|Never,No|No|No|No,No|No|No|No,2|2|2|3,No|No|No|No,Persistent Disease|Persistent Disease|Persistent Disease|Persistent Disease,Persistent Disease|Persistent Disease|Persistent Disease|Persistent Disease,n/a|n/a|n/a|n/a,Yes|Yes|Yes|Yes,Distant Metastasis|Distant Metastasis|Distant Metastasis|Locoregional Recurrence,Other: omentum and vaginal cuff|Other: Omentum|Lung|Other: Recurrent to endometrium,341.0|345.0|833.0|871.0,No|No|No|No,No|Yes|No|No,Yes|Yes|Yes|Yes,No|No|No|No,453.0|726.0|1062.0|1447.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-01277,C3L-01277,No,b4,13,130N,CPT0093170003,Tumor,No,61,Female,Clear cell carcinoma,G3 Poorly differentiated,3.1,152,90,38.66,Present,<50% myometrial invasion,Seventh Edition (2010),pT4 ((FIGO IVA),pN1 (FIGO IIIC1),20,Stage IV,White,27.0318891,CNV_H,No,0.63,MSS,CNV_H,79,Mutated,WT,WT,WT,WT,6.70E-02,3.20E-02,0,7.19E-20,3.43E-18,0,1.71E-19,1.37E-03,0,0,1.85E-18,4.00E-18,3.86E-03,9.83E-20,0,1.04E-02,1.89E-18,6.68E-02,8.84E-18,0.227418213,1.63E-02,1.66E-02,7.02E-03,6.29E-03,0,0,1.94E-03,0,0,2.39E-03,0,2.56E-02,0,4.36E-02,0.169763751,0,3.64E-02,4.21E-02,7.84E-02,9.01E-02,0,9.87E-03,0.107773885,0,0.102388176,0,5.12E-02,0,0,0,7.22E-02,5.25E-02,0,3.77E-02,0.335894941,0,0,0,0.108333594,3.20E-02,0,10,45,0,18,0,0,0,0,0,0,0,0.55,NA,NA,-1.058231531,-0.210626637,6.22E-02,-0.477398904,-1.192535284,0.542375786,-0.383990391,-1.006978064,0.952184605,0.798950576,-0.432900851,-0.127951891,-1.408838516,0.58655408,5240.776464,3292.003806,8532.78027,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not reported,Medical Record Does Not State,Other: Medical Record Does Not State,Other,he mass occupies the anterior and posterior aspects of the endometrial cavity,Unifocal,IHC staining not done,3,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record,Lisinopril|levothyroxine,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,380|742|996,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|With Tumor,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Yes|Yes|Yes,No|No|No,3|3|3,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Persistent Disease|Persistent Disease,n/a|n/a|n/a,No|Yes|Yes,n/a|Distant Metastasis|Distant Metastasis,n/a|Other: Omental lymphadenopathy|Lung,n/a|482.0|986.0,n/a|No|No,n/a|No|Yes,n/a|Yes|No,n/a|No|No,351.0|713.0|967.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01355,C3L-01355,No,b4,16,129N,CPT0083580003,Tumor,No,55,Female,Endometrioid carcinoma,G1 Well differentiated,5,163,123,46.44,Not identified,NA,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,0.942652274,CNV_L,No,0.08,MSS,CNV_L,50,WT,Mutated,WT,Mutated,Mutated,8.62E-02,1.98E-02,1.96E-19,5.88E-19,0,0,1.91E-02,4.26E-19,0,5.35E-03,0,3.49E-03,0,1.58E-19,8.12E-03,0,3.91E-20,0,1.41E-18,5.17E-02,4.64E-02,3.94E-02,1.94E-02,4.00E-03,6.45E-19,3.77E-18,5.70E-03,6.47E-19,0,1.82E-02,1.53E-02,9.70E-03,6.38E-18,5.71E-02,4.11E-02,0,5.22E-02,0,5.22E-02,0,2.25E-02,2.93E-03,0.100605,0,0.197192035,0,0,1.23E-02,0,2.49E-02,0,1.73E-02,8.96E-02,1.77E-02,0.328690503,2.15E-02,4.21E-02,0.114054101,0,8.70E-03,0,14,32,0,0,0,0,0,0,0,0,0,0.22,NA,NA,-0.126893861,-0.984616904,0.801397286,-0.637450073,1.700052511,-1.329351105,-1.46E-02,0.472783019,0.166217987,-0.691435261,0.114622588,-0.111552549,-0.591740795,-0.469145403,3587.024284,4501.375963,8088.400247,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Caucasian,United States,Other,Anterior and Posterior Endometrium,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Medical Record|Self-reported and Medical Record|Medical Record|Medical Record,Yes,Self Report|Self Report|Self Report|Self Report|Self Report,Calcium - Vitamin D|Levothyroxine|Aspirin|Prednazole|Erythromycin,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,75|75,Unknown|Unknown,Unknown|Unknown,Unknown|Unknown,Tumor Free|Tumor Free,Never|Never,Former Therapy|Former Therapy,Unknown|Unknown,No|No,No|No,2|2,No|No,Unknown|Unknown,Unknown|Unknown,n/a|n/a,Unknown|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,75.0|75.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-01599,C3L-01599,No,b3,12,129N,CPT0087260003,Tumor,No,65,Female,Endometrioid carcinoma,G2 Moderately differentiated,2.7,147,97,44.89,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,0.156405766,CNV_L,No,0,MSS,CNV_L,112,WT,Mutated,WT,Mutated,Mutated,8.49E-02,6.91E-03,0,8.61E-20,1.59E-18,0,1.85E-02,3.54E-04,0,7.28E-03,0,1.77E-02,1.30E-02,0,4.77E-03,8.40E-03,2.16E-03,3.22E-02,0,3.16E-02,5.58E-03,5.63E-03,3.95E-03,2.83E-03,4.99E-18,3.75E-03,0,9.21E-04,1.73E-18,8.33E-03,2.50E-02,4.89E-03,6.33E-18,6.74E-02,5.44E-02,1.40E-19,1.79E-02,2.13E-02,3.92E-02,1.55E-02,0,0.114405509,0,0,0.198053445,0,5.11E-02,0,0,0,6.73E-02,1.84E-02,0,1.66E-02,0.272709018,7.14E-02,3.20E-02,0,3.93E-02,3.26E-03,9.99E-02,22,56,0,0,0,0,24,0,0,0,0,0.18,Yes,No,-2.15E-02,0.426131535,0.131651293,0.394229669,0.176353543,0.315489148,0.196728148,1.291477847,0.223097873,0.276834236,0.324712921,0.870133132,-1.74E-02,0.794763495,4440.305867,4308.379012,8748.684879,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and Posterior Endometrium,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,cytotec|lipitor|lisinopril|norvasc|glucophage|alphagan|timoptic,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,343|683|1063|1434,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,343.0|683.0|1063.0|1434.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-01631,C3L-01631,No,b3,11,128C,CPT0091720003,Tumor,No,56,Female,Endometrioid carcinoma,G1 Well differentiated,6,160,99,38.67,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,6.78E-02,CNV_L,No,11.17,MSI-H,MSI-H,682,WT,Mutated,WT,Mutated,Mutated,0,3.09E-03,0,0,6.80E-20,8.66E-19,0,1.15E-20,5.91E-18,0,0,3.12E-03,1.13E-02,2.35E-18,0,1.01E-02,0,7.74E-03,0,4.83E-03,5.81E-04,3.74E-04,0,1.71E-04,0,9.46E-19,3.68E-03,1.87E-03,1.15E-17,1.07E-02,3.27E-02,0,0,0.119919416,0.102864575,0,3.81E-03,9.38E-03,1.32E-02,0,9.36E-03,0,0,0,0.223499106,0,1.32E-02,0,0,0,6.19E-02,0,0.33586025,0,7.82E-02,0,8.82E-02,0,0.133073003,0,5.67E-02,0,139,207,0,0,0,0,174,0,0,0,0.47,NA,NA,-0.341020298,1.20761473,-0.989413729,0.489077105,0.320796914,0.542830899,1.645872576,0.285063461,0.789691314,6.23E-02,1.703117126,-0.416434551,1.237731988,0.871017415,4219.507861,3832.887634,8052.395495,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both Anterior and Posterior Endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,NA,NA,No,Alcohol consumption history not available,Current smoker: Includes daily and non-daily smokers,11,NA,30,67.5,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record,Aspirin,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Deceased,414|610|798,No|No|No,No|No|No,No|No|No,Tumor Free|Tumor Free|With Tumor,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,No|No|No,No|No|No,3|3|3,No|No|No,Not Applicable|Not Applicable|Not Applicable,Unknown|Unknown|Patient Deceased,n/a|n/a|n/a,No|No|Yes,n/a|n/a|Locoregional Recurrence,"n/a|n/a|Other: Left iliac fossa, Left sacrum, peritoneum",n/a|n/a|762.0,n/a|n/a|No,n/a|n/a|No,n/a|n/a|No,n/a|n/a|No,414.0|610.0|n/a,n/a|n/a|Other : Endometrial adenocarcinoma,n/a|n/a|798.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01633,C3L-01633,No,b1,4,127N,CPT0091910003,Tumor,No,60,Female,Endometrioid carcinoma,G1 Well differentiated,4.5,160,143,55.86,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,0.178322095,CNV_L,No,0,MSS,CNV_L,73,WT,Mutated,WT,Mutated,WT,3.45E-02,0,0,3.39E-18,7.93E-19,1.02E-02,4.03E-02,0,3.07E-20,2.04E-02,0,7.70E-03,6.87E-03,1.99E-03,7.99E-03,3.77E-03,1.71E-02,8.06E-02,4.53E-03,4.86E-02,1.24E-03,2.28E-03,2.21E-03,5.06E-03,9.19E-20,0,9.44E-05,2.03E-19,2.26E-18,0,2.23E-02,7.18E-03,0,1.86E-02,1.47E-02,6.36E-23,2.09E-02,4.39E-02,6.48E-02,0,2.04E-02,4.91E-02,2.84E-02,0,0.352906161,0,2.18E-02,0,0,0,7.23E-02,1.62E-02,0,1.29E-02,0.240613914,4.36E-02,0,5.24E-02,0,8.46E-02,4.71E-03,25,18,22,0,0,0,0,0,0,0,0,0.21,Yes,No,0.781632785,-0.566092091,1.704101023,-0.147346941,8.50E-02,-1.034017547,-9.00E-02,0.39991664,-0.885729034,-0.166252468,8.15E-02,0.462751848,-0.929132935,4.41E-02,4592.763231,4055.100729,8647.86396,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and Posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,1,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Actos|Amitiza|Celexa|Flexeril|KlonoPIN|Lipitor|Mobic|Norco|sinemet|Tekturna HCT|Allopurionol|AsA 81mg,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,327|664,No|No,No|No,No|No,Tumor Free|Tumor Free,Unknown|Unknown,Unknown|Unknown,Never|Never,Yes|Yes,Yes|Yes,1|1,No|No,Not Applicable|Not Applicable,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,327.0|664.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-01639,C3L-01639,No,b4,13,127C,CPT0091780003,Tumor,No,71,Female,Endometrioid carcinoma,G1 Well differentiated,5,160,89,34.77,Present,<50% myometrial invasion,Seventh Edition (2010),pT3a (FIGO IIIA),pNX,0,Stage III,White,1.011907283,CNV_L,No,1.13,MSS,CNV_L,456,Mutated,Mutated,WT,Mutated,Mutated,8.90E-02,3.19E-19,2.19E-19,3.06E-19,5.73E-19,2.49E-18,4.63E-18,1.15E-18,5.82E-18,6.66E-19,0,2.94E-21,1.19E-02,6.96E-19,8.84E-04,5.88E-03,0,0,0,4.97E-02,1.70E-02,1.68E-02,2.82E-03,4.89E-20,1.61E-18,1.21E-03,1.24E-03,3.09E-03,4.75E-19,1.04E-02,2.21E-02,0,0,5.86E-02,7.04E-02,9.62E-20,1.48E-02,1.01E-02,2.49E-02,0,0,6.43E-02,1.93E-02,0,0.11618684,0,8.71E-02,5.89E-03,0,0,0.104893787,0,9.17E-02,0,0.174455326,5.31E-02,6.78E-02,0,8.66E-02,3.37E-02,9.49E-02,0,27,192,0,0,0,0,85,0,0,43,0.26,NA,NA,-0.255091243,1.579198302,0.576991577,-0.190444625,0.71782467,1.115454172,1.679047346,1.269904533,-0.498095798,9.04E-02,1.954341006,-0.554553453,0.18035603,1.015914016,4196.777011,4648.744533,8845.521543,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Anterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,4 or more,NA,NA,Yes,Lifelong non-drinker,"Current reformed smoker, more than 15 years",18,21,20,3,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,sinemet|Metoprolol tartrate|Prilosec|Requip|Zoloft|Zocor|ibuprofen,Breast Cancer|Skin Cancer,Medical Record|Medical Record,Surgery|Surgery,Yes|Yes,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,342|670|1060|1427,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,No|No|No|No,4 or more|4 or more|4 or more|4 or more,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,342.0|670.0|1060.0|1427.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-01664,C3L-01664,No,b2,5,127N,CPT0086490003,Tumor,No,75,Female,Endometrioid carcinoma,G2 Moderately differentiated,5.2,155,83,34.55,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,5.051972445,CNV_L,No,3.14,MSS,CNV_L,183,WT,Mutated,WT,WT,WT,7.35E-02,2.07E-02,0,0,0,7.11E-03,5.06E-02,2.54E-20,0,0,1.90E-20,2.10E-02,3.07E-02,2.76E-18,1.54E-03,3.33E-18,6.97E-03,0,3.25E-18,1.84E-03,1.42E-02,1.53E-02,4.99E-03,2.88E-03,7.48E-19,7.04E-04,0,3.61E-03,1.32E-17,2.84E-02,3.17E-02,1.88E-02,4.85E-18,5.60E-02,2.00E-02,0,3.37E-02,1.67E-18,3.37E-02,0,0,8.81E-02,9.91E-02,0,0.114647906,0,1.64E-03,2.05E-02,7.98E-02,0,0,0,0.266968512,2.71E-02,0.189947201,2.03E-02,0,0,3.27E-02,0,5.92E-02,17,50,0,0,0,0,52,26,0,0,0,0.52,NA,NA,-8.13E-02,-0.464820621,7.18E-02,1.29461329,-0.672822458,-1.092269129,0.353726316,0.281782086,-0.758120995,-1.219757543,0.613020629,0.780610055,0.434320852,-0.497425494,2086.954404,4254.848677,6341.80308,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Anterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,"Ascorbic Acid|Aspirin 81 mg|Calcium |acetaminophen |warfarin |propafenone|polyethyl glycol-prop glycol |Hydrochlorothiazide|multiple vitamin |Folic acid-Vit -B 12|Dorzolamide |Vitamin D 1,000",NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,345|655|1071|1373,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,No|No|No|No,3|3|3|3,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,345.0|655.0|1071.0|1373.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-01668,C3L-01668,No,b2,5,131C,CPT0186560008,Tumor,No,37,Female,Endometrioid carcinoma,G2 Moderately differentiated,5,163,117,44.11,Not identified,NA,Eighth Edition (2017),pT1a,pN0,4,Stage IA,White,0,CNV_L,No,0.02,MSS,CNV_L,46,WT,Mutated,WT,Mutated,WT,7.12E-02,1.62E-02,4.69E-03,0,9.23E-18,2.16E-02,2.99E-02,1.30E-19,0,0,0,2.99E-02,5.24E-04,2.83E-18,9.36E-03,3.83E-03,1.93E-02,8.43E-18,0,2.74E-02,7.21E-20,0,1.96E-03,1.57E-02,2.28E-18,0,5.28E-03,7.87E-19,1.29E-17,2.05E-02,7.75E-02,7.18E-03,5.56E-18,0.115500556,2.24E-02,0,4.03E-02,1.92E-03,4.22E-02,1.21E-02,0,2.89E-02,0,0,0.297161092,0,1.12E-02,0,0,0,0.172118374,0,0.120838091,9.32E-03,0.186732225,3.75E-02,1.46E-02,7.29E-02,0,0,3.65E-02,2,24,0,0,0,0,13,0,0,0,0,0.47,Yes,Yes,-0.341556581,-0.957719768,0.284534034,-1.008999551,-0.137769316,-1.307441357,0.201843564,0.450185646,0.435963206,-0.825829562,0.210414014,1.549853643,-0.736410631,0.153118849,3716.933961,4389.747735,8106.681696,Not Examined,Not identified,Cannot be assessed,Not-Hispanic or Latino,American,United States,Other,Anterior and Posterior Endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Not applicable,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Former Therapy,2,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Lisinopril-HCTZ|Simvastatin|Metformin,NA,NA,NA,NA,Yes,1,12 Months,No,Living,377,No,Yes,No,Tumor Free,Never,Unknown,Never,Yes,Yes,None,No,Unknown,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,377,n/a,n/a,n/a,n/a,n/a
+C3L-01672,C3L-01672,No,b2,8,130C,CPT0172460008,Tumor,No,53,Female,Endometrioid carcinoma,G2 Moderately differentiated,3,163,118,44.67,Present,=50% myometrial invasion,Eighth Edition (2017),pT3a,pNX,0,IIIA,White,0.672473285,CNV_L,No,7.92,MSI-H,MSI-H,208,WT,Mutated,WT,WT,WT,1.36E-02,1.06E-02,7.97E-20,8.62E-19,1.07E-18,0,0,3.69E-19,1.60E-19,1.13E-18,3.54E-18,5.33E-19,1.43E-02,7.24E-19,1.30E-21,2.32E-03,3.52E-04,4.33E-18,0,8.50E-02,3.22E-03,1.26E-03,8.67E-03,1.17E-02,3.23E-18,5.36E-19,3.17E-03,6.05E-20,5.98E-18,2.38E-02,2.41E-02,2.09E-02,8.14E-18,3.32E-02,9.69E-02,7.38E-20,1.72E-02,1.16E-03,1.84E-02,4.84E-03,0,3.77E-02,1.03E-02,0,0.198499667,0,0,2.63E-02,0,2.11E-03,1.92E-02,2.48E-02,3.29E-03,6.71E-03,0.476896526,1.76E-02,4.10E-02,0.128732063,0,0,2.20E-03,29,32,0,0,0,19,47,23,0,0,0,0.85,Yes,Yes,-0.594531395,-0.864923509,1.451895509,0.799795961,-3.88E-02,-1.971425109,-1.824433787,-1.053802318,-0.191403293,-1.391352346,-1.807848599,-0.457897228,2.065854687,-1.625929264,1758.206873,2505.853245,4264.060118,Not Examined,Not identified,Cannot be assessed,Hispanic or Latino,Hispanic,Other: Mexico,Other,"Anterior Endomyometrium, Posterior Endomyometrium, Right and left fallopian tubes",Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Lisinopril|Hydrochlorothiazide|Metformin|amlodipine,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,353|683|1001,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Never|Never|Never,Yes|Yes|Yes,Yes|Yes|Yes,None|None|None,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,353.0|683.0|1001.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01729,C3L-01729,No,b4,16,128C,CPT0127610003,Tumor,No,60,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.8,157,63,25.24,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,1,Stage II,White,0.229568734,CNV_L,No,0.04,MSS,CNV_L,97,WT,Mutated,WT,Mutated,WT,0.103004948,1.01E-02,3.94E-19,0,3.84E-18,0,4.89E-02,0,3.44E-03,3.13E-02,0,8.39E-03,1.46E-02,0,1.94E-02,7.76E-03,4.85E-03,0.103948125,0,5.77E-02,1.38E-02,1.81E-02,4.05E-03,1.22E-04,3.70E-20,1.18E-03,1.58E-04,0,1.66E-17,2.09E-02,1.19E-02,6.00E-03,0,3.77E-02,2.94E-02,0,3.52E-02,6.43E-02,9.96E-02,9.32E-03,6.12E-04,6.06E-02,5.08E-02,0,0.209417137,0,6.88E-02,7.21E-03,0,0,5.88E-02,0,6.47E-02,4.66E-02,0.263941742,9.01E-02,1.63E-02,5.27E-02,0,0,0,35,28,0,0,0,0,16,0,0,0,0,0.2,NA,NA,0.797454727,-0.307133033,0.906066408,-0.687164104,1.341315968,-0.734403795,0.446060979,0.280807853,-1.876080099,6.14E-02,0.513159045,0.838131779,-0.338470453,-0.174036446,4539.089442,4978.0769,9517.166342,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not reported,Medical record does not state.,Other: Medical record does not state.,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,4 or more,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,NA,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,400|755,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,No|No,No|No,4 or more|4 or more,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,371.0|726.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-01732,C3L-01732,No,b4,13,129C,CPT0099910003,Tumor,No,84,Female,Endometrioid carcinoma,G2 Moderately differentiated,8,160,59,23.2,Present,=50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN0,9,Stage II,White,2.14E-03,CNV_L,No,19.82,MSI-H,MSI-H,676,WT,Mutated,WT,Mutated,Mutated,7.66E-02,9.83E-03,0,0,0,3.96E-18,1.01E-03,0,0,5.23E-18,0,2.29E-04,2.34E-02,8.58E-18,3.84E-03,0,1.73E-18,5.42E-18,0,8.43E-03,8.65E-03,7.39E-03,3.59E-03,6.28E-03,1.76E-19,8.33E-19,4.83E-03,3.09E-19,0,1.26E-02,2.18E-02,1.56E-02,0,6.80E-02,0.125623648,0,1.91E-02,3.10E-18,1.91E-02,0,0,3.56E-03,7.59E-02,0,0.254059786,0,8.67E-02,2.00E-02,0,2.31E-03,4.13E-02,2.90E-02,6.36E-02,6.75E-02,0.221364378,6.64E-02,0,0,6.85E-02,0,0,18,98,250,0,0,0,0,0,0,0,105,0.74,NA,NA,-0.630840503,-0.159475609,0.817061802,0.940755102,-1.499832442,0.288530733,-0.698237665,-0.239938406,0.581073052,-0.971161928,-0.77449737,-1.402800452,0.459329031,-0.423072369,2658.088029,3300.90167,5958.9897,Positive for malignancy,Present,Margins uninvolved by invasive carcinoma,Not reported,Medical Record Does Not State,Other: Medical Record Does Not State,Other,umor invades 100% of endomtrial cavity,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,Cytokeratin AE1/3,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record,Simvastatin|Atenolol,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,387|743|1085,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Yes|Yes|Yes,No|No|No,3|3|3,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,366.0|722.0|1064.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01739,C3L-01739,No,b2,5,131N,CPT0100100003,Tumor,No,71,Female,Other,G3 Poorly differentiated,3.5,165,89,32.78,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,17,Stage I,White,1.07E-02,CNV_L,No,18.91,MSI-H,MSI-H,1308,WT,Mutated,Mutated,Mutated,Mutated,0.29888615,8.24E-02,7.48E-02,0,0,9.31E-18,3.74E-02,1.00E-02,0.128787619,0.205004105,4.01E-02,2.96E-03,7.04E-03,1.10E-18,2.07E-02,2.76E-03,8.96E-18,0,6.56E-18,0,7.04E-02,5.81E-02,1.68E-02,3.07E-03,4.54E-04,0,1.70E-02,0,5.62E-18,4.75E-19,0.161381983,1.79E-02,9.09E-03,4.77E-02,0.358099116,7.87E-02,0.203546477,1.38E-03,0.20492677,1.36E-02,0,1.57E-02,0.272960521,0,2.39E-02,0.186977988,9.58E-02,0,1.03E-02,0,7.00E-02,1.25E-03,9.02E-03,0.125837008,0.13564878,6.90E-03,1.46E-04,0,3.19E-02,0,0,9,191,513,0,0,0,0,257,0,0,0,0.41,NA,NA,-0.712886693,0.99171413,-0.908704148,0.446270786,2.199885639,0.626313982,0.899018117,8.08E-02,1.798070804,-1.357002691,0.563161503,0.162628927,0.208403811,1.06936262,4058.723808,7279.442683,11338.16649,Negative for malignancy/normal/benign,Present,Margins uninvolved by invasive carcinoma,Not reported,Medical record does not state.,Other: Medical record does not state.,Other,Tumor occupies 80% of endometrial cavity,Unifocal,IHC staining not done,0,6,IHC staining not done,0,5,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,"PAX8, HNF-1 beta component, ER, PR, P53, P16, VIMENTIN, AE1/AE3; ALL POSITIVE, IHC done on endometrial biopsy",NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record,levothyroxine|Pantoprazole (Protonix),NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,412|783|1147,No|No|No,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,No|No|No,No|No|No,2|2|2,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,385.0|756.0|1120.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01749,C3L-01749,No,b2,6,127C,CPT0127700003,Tumor,No,54,Female,Endometrioid carcinoma,G1 Well differentiated,3.5,157,76,30.54,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,2,Stage I,White,1.79E-03,CNV_L,No,0.02,MSS,CNV_L,35,WT,WT,Mutated,WT,WT,3.17E-02,0,0,3.90E-18,3.60E-19,1.11E-02,1.73E-02,5.99E-04,3.29E-03,1.59E-02,0,3.62E-18,0,0,2.85E-03,3.58E-02,0,0.212412943,1.95E-19,0.136231588,7.30E-22,8.23E-03,2.98E-05,1.31E-03,0,1.08E-19,0,1.68E-19,2.42E-18,2.61E-02,7.69E-03,0,8.64E-18,2.69E-02,1.60E-18,1.47E-02,4.97E-03,0.125751944,0.130726411,0,4.46E-03,0,5.76E-02,0,0.308827648,0,2.51E-02,5.47E-02,0,0.116206629,0,3.56E-02,4.81E-02,3.04E-02,0.160675162,3.37E-02,5.28E-03,0.119449447,0,0,0,10,22,0,0,0,0,0,0,0,0,0,0.32,NA,NA,0.648690675,0.154573407,0.594658313,-0.907043997,-0.458487295,0.280402936,-1.096688548,0.112645172,-0.720385383,1.486700506,-0.787026631,-0.436772776,-0.800584734,1.288778283,5296.512074,3618.354054,8914.866128,Negative for malignancy/normal/benign,Not identified,Cannot be assessed,Not reported,Medical Record Does Not State,Other: Medical Record Does Not State,Other,Tumor involves 50% of the anterior endometrium and 50% of the posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,NA,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,384|727|1177,No|No|No,No|No|No,No|No|No,Tumor Free|Tumor Free|With Tumor,Never|Never|Never,Former Therapy|Former Therapy|Former Therapy,Never|Never|Never,No|No|No,No|No|No,3|3|3,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Persistent Disease,n/a|n/a|n/a,No|No|Yes,n/a|n/a|Locoregional Recurrence,n/a|n/a|Other: retroperitoneal mass,n/a|n/a|987.0,n/a|n/a|No,n/a|n/a|No,n/a|n/a|Yes,n/a|n/a|No,361.0|704.0|1154.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01833,C3L-01833,No,b3,11,127N,CPT0100010003,Tumor,No,62,Female,Endometrioid carcinoma,G1 Well differentiated,3.1,157,101,40.78,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,1.65E-03,CNV_L,No,0.03,MSS,CNV_L,82,WT,Mutated,Mutated,Mutated,WT,1.83E-02,0,3.12E-19,0,0,1.20E-18,5.45E-18,0,0,4.52E-03,4.44E-18,8.17E-04,1.12E-02,2.42E-03,2.89E-03,1.18E-18,1.14E-19,7.51E-18,3.07E-18,5.84E-02,0,0,1.55E-03,1.29E-02,0,0,3.07E-03,5.86E-20,0,1.67E-02,2.55E-02,1.45E-02,0,3.80E-02,9.66E-02,9.04E-03,1.05E-02,4.43E-18,1.05E-02,1.69E-02,0,0.113675626,3.73E-02,0,0.271246307,0,0,6.00E-02,0,0,2.66E-02,0,0.213022797,1.03E-02,0.219341384,2.60E-02,0,0,0,0,5.63E-03,22,38,0,17,0,0,0,0,0,0,0,0.76,Yes,No,0.454187204,-1.476122622,0.729312363,0.169100336,-0.702773155,-1.282925204,-0.471961987,-0.995322422,0.722557203,-0.623596032,-0.427346219,-2.14E-03,-0.477063936,-0.692027789,2871.523465,2857.000839,5728.524304,Negative for malignancy/normal/benign,Present,Cannot be assessed,Not reported,Medical record does not state.,Other: Medical record does not state.,Other,anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Insulin (Novolog) |Metoprolol|Pravastatin,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,421|694|1030,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Yes|Yes|Yes,Yes|Yes|Yes,3|3|3,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,376.0|649.0|985.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-01840,C3L-01840,No,b1,1,127N,CPT0091990003,Tumor,No,56,Female,Endometrioid carcinoma,G1 Well differentiated,5.5,165,103,37.83,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,7.07E-03,CNV_L,No,0.05,MSS,CNV_L,49,WT,Mutated,Mutated,Mutated,Mutated,4.54E-02,4.55E-03,2.49E-04,8.10E-19,1.53E-18,0,0,0,9.80E-21,4.24E-03,0,6.80E-19,2.54E-03,4.55E-19,0,1.25E-18,0,0,0,3.45E-03,8.19E-04,2.16E-03,2.18E-03,2.39E-03,0,1.60E-18,4.93E-03,0,9.77E-18,2.72E-02,3.71E-02,8.43E-03,0,4.31E-02,4.19E-02,3.05E-02,5.17E-03,8.18E-19,5.17E-03,6.59E-02,0,4.78E-03,0.113581244,0,6.92E-02,6.42E-03,0.180356946,0,0,0,0.139898222,1.03E-02,0.172936562,5.23E-02,9.57E-02,7.39E-02,0,0,1.25E-02,2.33E-03,0,20,25,0,0,0,0,0,0,0,0,0,0.83,Yes,No,0.118408763,3.62E-02,0.305884605,-0.45405098,-0.603255283,0.566964808,-0.127254893,0.328834416,0.374257131,-0.55463518,5.97E-02,-0.891766804,0.629323542,0.991155523,2433.141807,3410.548022,5843.689829,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both anterior and posterior,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,1,NA,NA,No,Alcohol consumption history not available,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record,Lansoprazole|Losartan,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,348|686|1048|1442,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|With Tumor,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,No|No|No|No,1|1|1|1,No|No|No|No,Not Applicable|Not Applicable|Not Applicable|Not Applicable,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Persistent Disease,No|No|No|Yes,n/a|n/a|n/a|Locoregional Recurrence,n/a|n/a|n/a|Other: Vaginal and pelvic nodes,n/a|n/a|n/a|1243.0,n/a|n/a|n/a|No,n/a|n/a|n/a|Yes,n/a|n/a|n/a|Yes,n/a|n/a|n/a|No,348.0|686.0|1048.0|1442.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-01864,C3L-01864,No,b4,13,129N,CPT0092860003,Tumor,No,80,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.5,160,73,28.51,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,4.49E-03,CNV_L,No,18.93,MSI-H,MSI-H,787,WT,Mutated,WT,Mutated,Mutated,5.84E-02,1.15E-02,1.93E-03,0,2.79E-20,0,3.67E-18,0,2.42E-03,9.39E-03,1.60E-19,1.96E-04,3.45E-02,0,4.88E-20,0,0,1.47E-17,0,1.60E-20,2.25E-02,1.45E-02,8.85E-03,4.62E-03,1.07E-19,1.85E-19,1.11E-03,0,2.81E-18,1.88E-02,3.59E-02,1.52E-02,0,4.25E-02,0.256989657,0,2.74E-02,7.53E-18,2.74E-02,0,0,0.103691799,0.112177383,0,7.57E-02,0,5.04E-02,0,0,0,7.12E-02,0,3.93E-02,0.109020366,0.297022682,5.96E-03,1.20E-02,0,0.104741705,0,1.89E-02,14,99,244,0,0,0,0,135,0,0,59,0.86,NA,NA,-0.66907287,0.743923923,1.031585214,0.176813757,1.285171964,0.703753678,0.371534146,-0.605994245,0.492592676,-0.881873324,0.428918864,-0.86456841,1.141146436,-1.205182171,2613.250206,3552.002805,6165.253011,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,4 or more,NA,NA,No,Alcohol consumption history not available,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record,Cholecalciferol|Hydrochlorothiazide,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,390|392,No|No,No|No,No|No,Tumor Free|Unknown Tumor Status,Unknown|Unknown,Unknown|Unknown,Unknown|Unknown,Yes|Yes,No|No,4 or more|4 or more,No|No,Not Applicable|Not Applicable,Complete Remission|Unknown,n/a|n/a,No|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,390.0|392.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-01913,C3L-01913,No,b2,6,128C,CPT0092690003,Tumor,No,55,Female,Endometrioid carcinoma,Other: High grade,4,160,67,26.17,Present,=50% myometrial invasion,Eighth Edition (2017),pT1b,pN1,23,Stage IIIC1,White,2.70724875,CNV_L,Yes,0.17,MSS,POLE,18840,Mutated,Mutated,Mutated,Mutated,Mutated,0.117666086,1.28E-02,1.29E-02,0,3.80E-19,0,1.04E-02,1.14E-02,7.41E-03,2.46E-02,1.06E-18,2.82E-03,3.55E-02,3.07E-18,2.01E-03,2.97E-19,0,0,8.31E-19,2.21E-02,1.80E-02,1.07E-02,1.24E-02,2.29E-03,0,4.42E-18,4.79E-03,0,1.09E-17,4.91E-04,3.02E-02,3.75E-03,0,2.51E-02,0.235016335,1.13E-18,2.83E-02,1.48E-19,2.83E-02,0,2.71E-02,7.69E-03,0.125054052,0,0.198178282,0,0,2.01E-02,0,0,2.24E-02,2.24E-02,0.117390967,0.115701185,0.226384973,2.96E-02,0,7.80E-02,0,1.02E-02,0,2089,7336,0,0,4224,5063,0,0,0,0,0,0.59,NA,NA,-1.020872973,-0.703173935,-0.628478858,0.425288773,8.13E-02,-0.455588047,-0.512797008,-0.424064354,0.479794412,-0.336293525,-0.778982613,0.542219472,-0.19937804,0.399632053,3921.613839,4160.530048,8082.143888,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both anterior and posterior,Unifocal,IHC staining not done,1,4,IHC staining not done,0,1,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,4 or more,NA,NA,No,Alcohol consumption history not available,Current smoker: Includes daily and non-daily smokers,33,NA,20,22,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,369|694|1052|1422,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Former Therapy|Former Therapy|Former Therapy|Former Therapy,Unknown|Unknown|Unknown|Unknown,No|No|No|No,No|No|No|No,4 or more|4 or more|4 or more|4 or more,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,369.0|694.0|1052.0|1422.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-01967,C3L-01967,No,b1,4,128N,CPT0108750003,Tumor,No,65,Female,Endometrioid carcinoma,G1 Well differentiated,6,164,100,37.18,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,8,Stage I,Not Reported,1.73E-02,CNV_L,No,0.02,MSS,CNV_L,165,WT,WT,WT,WT,WT,0.31592033,0,2.05E-02,2.22E-18,6.38E-19,0,0.100189533,8.00E-03,9.92E-02,0.124124935,2.89E-02,0,4.34E-03,3.56E-19,5.92E-02,6.88E-02,3.62E-03,0.345642662,6.86E-02,0.209937657,4.07E-02,3.65E-02,2.43E-02,6.77E-04,3.29E-18,3.14E-02,0,2.97E-04,3.63E-18,1.70E-03,5.10E-02,8.58E-05,0,4.22E-18,0.19494426,1.85E-21,0.156717472,0.218004461,0.374721933,1.33E-02,0,3.14E-02,0.18246362,0,0.180507056,3.69E-02,3.59E-02,0,1.88E-02,0,5.17E-02,7.32E-04,0,7.61E-02,0.241358829,1.92E-02,1.45E-02,5.57E-02,0,2.67E-02,1.48E-02,25,48,0,0,0,0,0,0,31,0,42,0.16,NA,NA,0.536843247,0.113003379,0.542891843,-0.845478311,2.148028633,-5.41E-02,0.869400448,-0.399336233,-0.451862202,2.176888745,0.825817444,1.298364714,-0.96120472,0.422316303,8031.335145,7301.581729,15332.91687,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not reported,Unknown,United States,Other,Both anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,4 or more,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Yes,Self Report|Self Report|Medical Record|Self Report|Self Report|Self Report|Self Report|Self Report|Self Report|Self Report,Yes,Self Report|Self Report|Self Report,Proair|Vitamins|Loratadine,NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,0,Unknown,Unknown,Unknown,Unknown Tumor Status,Unknown,Unknown,Unknown,No,No,4 or more,No,Unknown,Unknown,n/a,Unknown,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a
+C3L-02119,C3L-02119,No,b1,3,129N,CPT0114340003,Tumor,No,72,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.3,170,110,38.06,Not identified,NA,Seventh Edition (2010),pT1 (FIGO I),pN0,6,Stage I,White,1.516079337,CNV_L,No,0.75,MSS,CNV_L,98,WT,Mutated,WT,Mutated,Mutated,4.04E-02,8.31E-03,2.59E-19,0,5.06E-19,2.41E-03,1.88E-02,2.52E-19,0,1.39E-02,0,5.06E-03,3.35E-03,0,2.46E-02,5.93E-03,7.78E-03,3.36E-02,0,1.32E-02,3.43E-02,3.31E-02,1.59E-02,4.76E-03,0,3.87E-18,3.89E-03,0,1.63E-19,2.78E-02,2.79E-02,4.10E-03,1.60E-18,6.58E-02,1.11E-02,4.61E-03,5.32E-02,1.98E-02,7.30E-02,0,0,6.43E-03,0.152587534,0,0.208113076,0,1.33E-02,9.69E-03,0,5.39E-02,0,4.36E-02,0.105875785,1.90E-02,0.327131538,3.54E-03,1.48E-02,1.92E-02,0,2.28E-02,0,12,33,31,0,0,0,0,0,0,0,0,0.5,NA,NA,-0.476873862,-0.263096241,-0.257920119,-8.61E-02,-0.811554349,-0.685522975,0.23483925,-7.33E-02,-0.851782968,-0.444961258,0.290148182,0.963318943,0.137828906,0.568796003,3653.131273,4716.283529,8369.414802,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Caucasian,United States,Posterior endometrium,NA,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,Yes,Absent,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,"Napsin A, Pancytokeratin - both negative",NA,No,Alcohol consumption history not available,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record,"Atorvastatin (Lipitor)|Cholecalciferol, Vitamin D3",NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|No,Living|Living,342|567,No|No,No|No,No|No,Tumor Free|Tumor Free,Unknown|Unknown,Unknown|Unknown,Never|Never,Yes|Yes,No|No,None|None,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,323.0|548.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02121,C3L-02121,No,b3,9,130N,CPT0114100003,Tumor,No,39,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.8,175,105,34.11,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,2.17E-02,CNV_L,No,0.08,MSS,CNV_L,44,WT,Mutated,Mutated,Mutated,WT,3.86E-02,5.71E-03,9.99E-19,0,1.14E-17,0,2.38E-18,1.92E-20,0,3.01E-18,1.52E-17,5.84E-04,1.01E-02,0,0,8.46E-20,9.51E-19,0,2.21E-18,0,0,0,2.50E-03,9.63E-03,0,7.86E-18,9.76E-21,1.58E-18,8.67E-18,2.13E-02,5.75E-03,1.04E-02,0,0.152446734,0.116417636,0,1.02E-02,3.73E-19,1.02E-02,8.48E-02,0,7.33E-02,4.60E-02,0,0.235766049,0,2.99E-02,4.80E-02,0,0,0.106362312,1.67E-02,0.113006634,3.73E-02,0.163206174,3.30E-02,1.25E-02,0,0,0,0,7,16,0,0,3,3,12,0,0,0,0,0.73,NA,NA,-0.16890428,-0.399057258,0.184331649,0.162518682,-1.380936812,-1.127871458,-0.524421057,-0.456343557,0.314030403,-1.442001644,-0.483844297,-1.74904298,-3.72E-02,-0.779539053,2282.601855,2822.843276,5105.445131,Not Examined,Present,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Caucasian,United States,Other,Both anterior and posterior,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Premenopausal: less than 6 months since LMP AND no prior bilateral oophorectomy AND not on estrogen replacement,Never,None,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,1,12 Months|24 Months,Yes|Yes,Living|Living,417|417,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,No|No,No|No,None|None,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,170.0|170.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02125,C3L-02125,No,b2,8,131C,CPT0197250003,Tumor,No,68,Female,Serous carcinoma,G3 Poorly differentiated,7,165,80,29.45,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,4,Stage III,White,38.83879719,CNV_H,No,1.35,MSS,CNV_H,82,Mutated,WT,WT,WT,Mutated,0.132382395,7.67E-03,2.39E-03,2.65E-19,4.45E-19,8.81E-18,7.59E-18,4.02E-03,0,0,0,0,4.50E-02,0,2.33E-03,0,1.08E-18,3.41E-03,0,1.55E-02,1.71E-02,2.23E-02,1.64E-19,4.32E-03,0,5.72E-19,1.90E-21,1.09E-19,0,2.44E-02,3.79E-02,4.40E-03,1.62E-18,0.106546166,0.228666305,0,2.09E-02,2.67E-03,2.36E-02,4.86E-02,0,7.92E-02,4.82E-02,0,2.96E-02,0,5.75E-02,0,1.64E-02,0,0.121450749,0,0.190494979,0.1760159,0.183873163,0,3.41E-02,0,8.34E-03,6.11E-03,0,14,64,0,0,0,0,0,0,0,0,0,0.47,NA,NA,-1.810015984,0.925194903,-0.19338274,0.296579574,1.628249652,1.562211057,0.741314062,-1.189854714,2.562004207,0.834973613,0.76376946,-8.92E-02,-1.247598295,0.793274634,3640.161195,3809.917664,7450.078858,Not Examined,Present,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Caucasian,United States,Other,Both anterior and posterior,Multifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Negative,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Overexpression,Yes,Cannot be determined,Yes,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,1,PAX8 - Positive,"P16-positive, Pancytokeratin - no keratin positive metastatic cells identified, GATA-3 - negative",No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record,Yes,Medical Record|Medical Record,enalapril (VASOTEC) |traMADol (ULTRAM),NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,410,Yes,Yes,No,Unknown Tumor Status,Unknown,Unknown,Unknown,Yes,No,1,No,Complete Remission,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,410,n/a,n/a,n/a,n/a,n/a
+C3L-02216,C3L-02216,No,b2,6,127N,CPT0104690003,Tumor,No,68,Female,Endometrioid carcinoma,G1 Well differentiated,4.5,160,87,33.98,Present,=50% myometrial invasion,Eighth Edition (2017),pT1b,pN1,18,Stage IIIC1,White,0,CNV_L,No,17.11,MSI-H,MSI-H,854,WT,Mutated,WT,Mutated,Mutated,3.15E-04,3.86E-19,6.03E-19,0,1.16E-17,7.77E-19,4.19E-04,1.61E-03,0,2.08E-19,6.58E-18,0,1.74E-02,0,4.10E-03,8.11E-03,0,0,0,2.55E-02,6.31E-03,2.78E-03,8.70E-03,2.72E-03,3.28E-18,0,2.54E-21,3.38E-19,9.06E-18,7.69E-03,1.92E-02,8.16E-03,0,9.48E-02,6.60E-02,0,8.75E-03,4.06E-03,1.28E-02,1.72E-02,0,9.74E-02,4.99E-02,0,0.206528078,0,1.86E-02,0,0,2.04E-03,4.16E-02,9.62E-03,0,1.37E-02,0.354504454,0.115254986,0,7.36E-02,0,0,0,74,345,0,0,0,0,212,0,0,0,0,0.74,NA,NA,-0.635187668,-0.45532401,0.485969702,0.81716412,-1.391959106,-0.859056651,-1.94398288,0.803064702,-0.333173048,-0.870820866,-1.828703436,-0.954724608,0.267231971,0.140468678,3182.120029,2670.093058,5852.213087,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and Posterior,Unifocal,IHC staining not done,1,4,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,NA,NA,Yes,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,gabapentin|ambien|plaquenil|tenormin|methotrexate|amLODIPine|AMitriptyline,Breast,Medical Record,Surgery,Yes,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,346|697|1036|1432,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,No|No|No|No,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,346.0|697.0|1036.0|1432.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02347,C3L-02347,No,b2,8,128C,CPT0113850003,Tumor,No,62,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.5,165,112,41.14,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,0,CNV_L,No,9.76,MSI-H,MSI-H,331,WT,Mutated,WT,Mutated,Mutated,0.149283082,1.58E-02,1.02E-19,4.65E-20,7.30E-19,0,4.86E-02,0,2.97E-04,1.44E-02,0,5.17E-03,8.21E-03,0,9.94E-03,1.77E-03,0,6.74E-18,1.03E-18,1.28E-02,1.34E-02,1.26E-02,6.75E-03,2.85E-03,8.15E-05,0,2.29E-03,3.26E-19,1.20E-17,2.95E-02,1.72E-02,1.42E-02,0,9.94E-02,1.30E-02,3.01E-20,2.82E-02,8.85E-04,2.91E-02,3.01E-03,0,8.17E-02,7.36E-02,0,0.212249015,0,6.21E-02,2.04E-02,0,0,2.10E-02,0,0.244709493,5.31E-02,0.194216922,1.92E-02,0,0,2.03E-03,0,1.27E-02,26,61,55,0,0,0,81,0,0,0,0,0.71,Yes,No,-0.50666458,0.61431738,-2.43E-03,0.59694964,3.43E-02,0.112406606,0.345508127,-0.189280113,-0.545306974,-0.157317202,0.365770604,-8.13E-02,-0.328146903,0.424965714,3841.019296,4699.493375,8540.512671,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,NA,NA,No,Alcohol consumption history not available,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,"amiodarone (PACERONE)|amLODIPine (NORVASC)|aspirin | buPROPion (WELLBUTRIN SR) |carvedilol (COREG)|Cholecalciferol (VITAMIN D3)| ELIQUIS|ferrous sulfate|ferrous sulfate|gabapentin (NEURONTIN)|glipiZIDE (GLUCOTROL)|HUMULIN N & R|KLOR-CON |levothyroxine (SYNTHROID)|lovastatin (MEVACOR)|montelukast (SINGULAIR)|sertraline (ZOLOFT)|valsartan-hydrochlorothiazide (DIOVAN-HCT)",NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,323|696|1064|1438,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,3|3|3|3,No|No|No|No,Not Applicable|Not Applicable|Not Applicable|Not Applicable,Complete Remission|Complete Remission|Unknown|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,323.0|696.0|1064.0|1438.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02353,C3L-02353,No,b4,15,129C,CPT0112780003,Tumor,No,80,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,155,75,31.22,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,29,Stage I,White,6.15E-03,CNV_L,No,1.89,MSS,CNV_L,257,WT,Mutated,WT,WT,WT,0.234538444,2.71E-02,1.38E-02,3.58E-18,7.67E-04,3.37E-18,0.11078579,5.46E-03,1.59E-02,5.86E-02,2.30E-19,5.35E-03,2.07E-02,0,3.31E-02,1.25E-02,8.69E-19,6.68E-03,4.42E-18,1.82E-02,6.60E-02,5.49E-02,2.56E-02,8.04E-03,3.09E-20,1.14E-02,3.49E-03,0,2.62E-19,3.64E-02,7.21E-02,2.62E-03,0,7.04E-02,0.149000429,5.15E-03,0.108182427,1.02E-02,0.118383664,0,0,2.69E-02,7.89E-02,0,0.111091904,1.39E-02,5.48E-02,2.65E-02,3.07E-02,0,5.22E-02,0,0.203734855,6.47E-02,0.27981557,2.09E-02,0,2.43E-02,0,0,1.16E-02,29,70,0,0,0,0,74,0,0,0,19,0.29,NA,NA,-0.535372076,0.600107513,3.155340654,4.16E-02,-0.12352169,-0.722810459,0.678545274,-0.4187511,-0.339408721,0.4957856,0.534735804,0.803664177,8.78E-02,-0.518905112,5771.257457,6399.411201,12170.66866,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Posterior endometrium,NA,Unifocal,IHC staining not done,0,6,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,4 or more,NA,NA,Yes,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Prilosec |aleve|Multiple vitamin|Enoxaparin sodium |Motrin,Melanoma,Medical Record,Surgery,No,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,381|564|954|1359,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,No|No|No|No,No|No|No|No,4 or more|4 or more|4 or more|4 or more,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,381.0|564.0|954.0|1359.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02354,C3L-02354,No,b3,9,127C,CPT0112910003,Tumor,No,55,Female,Endometrioid carcinoma,G1 Well differentiated,4.1,165,80,29.38,Present,=50% myometrial invasion,Seventh Edition (2010),pT3a (FIGO IIIA),pN0,9,Stage IV,White,0,CNV_L,No,0.01,MSS,CNV_L,39,WT,WT,Mutated,Mutated,WT,8.13E-02,6.42E-03,2.79E-03,0,0,0,2.68E-02,1.63E-21,2.19E-03,1.19E-02,1.28E-18,0,5.95E-03,9.25E-19,0,5.63E-18,0,1.04E-17,2.02E-18,1.47E-02,4.00E-03,9.77E-03,0,6.00E-04,0,4.65E-18,3.60E-03,3.71E-19,0,3.68E-02,5.80E-02,1.08E-02,0,4.78E-02,9.11E-03,4.96E-03,8.81E-03,8.03E-18,8.81E-03,0,0,1.01E-02,8.54E-03,0,0.38078971,0,3.62E-02,0,5.02E-02,0,4.04E-02,0,0.142753811,0.110422357,0.183556076,3.86E-03,0,3.31E-02,0,0,0,15,13,0,0,0,0,0,7,0,0,0,0.74,NA,NA,1.362300799,-0.607768574,-0.522481165,-0.79104118,0.73534036,-8.33E-02,-0.738387863,0.834938257,0.147847122,-8.77E-02,-0.492512274,-1.419153354,0.430372758,1.736080971,2798.087024,3782.760715,6580.847739,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and Posterior,Unifocal,IHC staining not done,0,4,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R1: Microscopic residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,Medical Record,multivitamin,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,327|670|1050|1421,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Unknown Tumor Status|Tumor Free|Tumor Free|With Tumor,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Current Therapy|Former Therapy|Former Therapy|Former Therapy,No|No|No|No,No|No|No|No,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Persistent Disease,No|No|No|Yes,n/a|n/a|n/a|Locoregional Recurrence,n/a|n/a|n/a|Other: Right pericolic gutter mass,n/a|n/a|n/a|1294.0,n/a|n/a|n/a|Yes,n/a|n/a|n/a|No,n/a|n/a|n/a|Yes,n/a|n/a|n/a|No,327.0|670.0|1050.0|1421.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|1335.0,n/a|n/a|n/a|Surgical Resection,n/a|n/a|n/a|R0: No Residual Tumor
+C3L-02357,C3L-02357,No,b1,1,128N,CPT0113330003,Tumor,No,54,Female,Endometrioid carcinoma,G2 Moderately differentiated,6,157,106,43,Not identified,NA,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,White,2.510041587,CNV_L,No,0.05,MSS,CNV_L,89,WT,Mutated,WT,Mutated,Mutated,0.126444941,1.33E-02,2.30E-20,0,3.69E-18,1.09E-18,8.40E-02,0,1.06E-18,2.23E-02,0,7.06E-03,0,4.22E-19,3.25E-02,6.13E-03,9.29E-21,1.24E-02,0,2.68E-02,4.55E-02,2.51E-02,3.78E-02,3.34E-03,0,1.78E-02,2.75E-03,2.32E-03,2.85E-18,3.28E-02,2.21E-02,4.44E-03,0,5.62E-02,5.29E-18,0,7.65E-02,9.27E-03,8.58E-02,0,0,0,5.60E-02,0,0.227136013,0,3.61E-02,1.35E-02,0,0,4.91E-02,5.35E-03,0.112874235,3.22E-02,0.254988031,0.144074465,0,0,0,1.87E-02,4.99E-02,26,31,0,0,0,0,0,14,0,0,0,0.18,NA,NA,-0.124245636,9.25E-02,0.77034754,-0.48269097,4.04E-02,-1.113593164,1.02423415,0.333115793,-0.805252687,-0.400049712,1.235153615,1.039434126,0.467279941,0.500780463,4844.001649,5840.577918,10684.57957,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and Posterior,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,NA,NA,No,Lifelong non-drinker,Current smoker: Includes daily and non-daily smokers,15,NA,30,58.5,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,albuterol (PROAIR HFA)|Aspirin|Motrin|Omeprazole,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,349|349,No|No,No|No,No|No,Tumor Free|Tumor Free,Unknown|Unknown,Unknown|Unknown,Never|Never,No|No,No|No,2|2,No|No,Not Applicable|Not Applicable,Complete Remission|Complete Remission,n/a|n/a,No|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,349.0|349.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02382,C3L-02382,No,b4,14,130C,CPT0190700008,Tumor,No,64,Female,Endometrioid carcinoma,G2 Moderately differentiated,2.5,168,107,37.93,Present,<50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN1 (FIGO IIIC1),3,Stage III,White,5.17E-02,CNV_L,No,18.03,MSI-H,MSI-H,581,WT,Mutated,WT,WT,WT,7.43E-02,1.07E-02,0,0,0,0,1.53E-02,1.10E-19,2.94E-19,0,3.13E-18,1.04E-02,1.82E-02,1.63E-18,6.80E-03,3.02E-03,0,0,0,2.24E-02,1.01E-02,9.23E-03,4.74E-03,1.15E-04,3.06E-18,0,5.35E-03,2.81E-03,4.37E-18,1.72E-19,2.93E-02,1.49E-03,0,6.12E-02,2.18E-02,0,2.03E-02,1.51E-03,2.18E-02,0,0,1.37E-04,0,0,0.373585701,0,9.70E-03,0,0,5.05E-02,0,0,0.116044006,2.37E-02,0.168779456,3.61E-02,5.46E-02,0,6.21E-02,0,0.104740638,15,52,191,0,0,0,0,75,0,0,64,0.27,NA,NA,-0.159240424,1.725697304,8.39E-02,0.402260498,0.284473709,2.096147864,1.42419516,1.032923537,0.7880705,-0.262146321,1.582532969,-0.460392857,0.971265423,1.906322016,3493.014013,4634.052626,8127.066638,Negative for malignancy/normal/benign,Present,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,white,United States,Anterior endometrium,NA,Unifocal,1,0,0,0,0,0,0,0,cM0,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,Unknown,NA,NA,No,"Consumed alcohol in the past, but currently a non-drinker",Current smoker: Includes daily and non-daily smokers,25,NA,Unknown,NA,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,aspirin|atorvastatin|vitamin D3|fenofibrate micronized|ferrous sulfate|furosemide|metoprolol tartrate|pantoprazole|potassium chloride,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,360|815|1088,No|No|No,Yes|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Former Therapy|Former Therapy|Former Therapy,Never|Never|Never,Yes|Yes|Yes,No|No|No,None|None|None,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,291.0|746.0|1019.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-02384,C3L-02384,No,b2,5,127C,CPT0127980003,Tumor,No,81,Female,Serous carcinoma,G3 Poorly differentiated,2.1,163,58,21.8,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,15,Stage II,White,22.20816427,CNV_H,No,0.49,MSS,CNV_H,74,Mutated,WT,WT,WT,WT,0.150800748,5.60E-02,0,2.53E-18,0,1.15E-02,2.83E-02,9.81E-03,1.37E-02,4.02E-03,6.35E-03,0,6.74E-03,1.78E-18,4.08E-03,0,0,1.60E-02,8.14E-20,0,1.14E-02,2.08E-02,1.04E-03,4.34E-03,6.53E-03,0,2.32E-03,0,0,7.25E-03,2.19E-02,4.29E-02,0,0.138764484,7.83E-02,4.20E-18,5.97E-02,9.03E-03,6.87E-02,7.08E-02,0,0.105325826,0.155680228,0,8.38E-02,0,8.97E-02,0,6.11E-03,0,7.39E-02,0,6.47E-02,0.127723474,0.222159372,0,0,0,0,0,0,10,61,0,0,0,0,0,0,0,0,0,0.2,NA,NA,-1.173315254,0.38104993,-0.339323801,0.228006172,1.453802478,0.352534871,7.52E-02,-0.134404946,-0.872854207,0.405507519,6.20E-02,-0.230264388,-1.223733226,-0.272659945,3592.368124,4176.960813,7769.328937,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not reported,Medical record does not state.,Other: Medical record does not state.,Other,25% anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record,DULoxetine (CYMBALTA|memantine (NAMENDA XR),NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,372|725|907,No|No|No,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,No|No|No,No|No|No,2|2|2,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,326.0|679.0|861.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-02389,C3L-02389,No,b3,11,129N,CPT0203040008,Tumor,No,66,Female,Endometrioid carcinoma,G2 Moderately differentiated,5.5,155,103,42.89,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,13,Stage I,White,5.069920133,CNV_L,No,22.34,MSI-H,MSI-H,792,WT,Mutated,WT,Mutated,WT,0.115941548,2.34E-02,7.30E-21,0,4.85E-18,5.69E-18,4.25E-19,1.10E-18,0,1.29E-02,3.23E-18,9.64E-03,2.35E-02,4.70E-18,3.06E-03,1.60E-19,1.80E-19,1.00E-17,0,8.77E-03,1.71E-02,9.21E-03,8.87E-03,9.11E-03,2.27E-19,0,7.71E-03,5.70E-19,2.45E-17,2.14E-03,2.70E-02,1.56E-02,0,7.88E-03,0.236543285,1.79E-18,3.51E-02,5.10E-18,3.51E-02,0,0,0.115006357,0.105195773,0,0.12577489,1.02E-02,0.115195434,0,0,0,7.53E-02,0,5.33E-02,5.15E-02,0.263459463,0,1.53E-02,0,2.45E-02,2.40E-02,2.14E-02,22,134,155,0,0,0,137,122,0,0,0,0.84,NA,NA,-1.490880237,0.334990236,-2.305200076,1.509563344,-9.19E-03,0.718692115,0.178814955,-6.32E-02,1.24029417,-2.023194016,-1.68E-02,-0.602970907,1.296375935,-1.082169211,2125.608228,4362.302181,6487.910409,Positive for malignancy,Not identified,Margins uninvolved by invasive carcinoma,Not reported,Medical record does not state.,Other: Medical record does not state.,Anterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,WT1 negative,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record,Yes,Medical Record|Medical Record|Medical Record,lisinopril (PRINIVIL|hydrochlorthiazide|GLUCOSAMINE,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,412|778,No|No,Yes|Yes,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,None|None,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,351.0|717.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02392,C3L-02392,No,b3,10,127N,CPT0203250008,Tumor,No,58,Female,Endometrioid carcinoma,G1 Well differentiated,1,173,60,19.99,Present,<50% myometrial invasion,Seventh Edition (2010),pT1 (FIGO I),pN0,14,Stage I,White,0,CNV_L,No,0,MSS,CNV_L,42,WT,Mutated,WT,Mutated,Mutated,0.119162713,1.14E-02,0,0,2.16E-18,9.70E-18,0,0,0,3.00E-02,0,1.70E-19,8.61E-03,2.45E-18,2.22E-03,3.87E-02,3.22E-19,8.67E-02,0,7.24E-02,3.67E-02,2.02E-02,2.91E-02,8.95E-04,0,0,9.67E-03,3.80E-03,8.39E-18,8.41E-19,0.115187762,1.16E-02,0,1.33E-02,1.80E-02,9.42E-19,3.67E-02,6.36E-02,0.100282353,7.92E-02,0,1.46E-02,0,0,0.297421448,0,4.48E-02,0,0,7.14E-03,0.153642558,2.55E-02,6.69E-02,0,0.150684184,0,4.15E-02,0,1.08E-02,3.77E-02,7.01E-02,16,22,0,0,0,0,0,0,0,0,0,0.42,Yes,Yes,1.158002193,0.354457106,-6.89E-02,0.904742306,-0.170557405,0.52844758,1.756731819,-0.529442237,0.698834426,0.527759844,1.922361048,-6.67E-02,-5.19E-02,0.909140434,5456.76615,6014.84868,11471.61483,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not reported,Medical record does not state.,Other: Medical record does not state.,Other,100% of anterior and 80% of posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,"Current reformed smoker, years unknown",NA,NA,Unknown,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,lisinopril (PRINIVIL|albuterol sulfate|Furosamide|Loratadine|Metformin|Megestrol,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,407|721,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,Yes|Yes,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,356.0|670.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02399,C3L-02399,No,b1,3,127N,CPT0171090008,Tumor,No,72,Female,Endometrioid carcinoma,G1 Well differentiated,6,160,73,28.52,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,4,Stage I,White,0,CNV_L,No,18.08,MSI-H,MSI-H,658,WT,Mutated,WT,Mutated,Mutated,1.84E-02,2.46E-03,1.90E-20,0,2.85E-19,0,2.55E-18,1.83E-03,5.01E-04,2.83E-19,0,1.70E-03,1.12E-02,0,1.52E-19,6.26E-19,0,0,0,2.68E-02,2.87E-18,0,5.79E-04,9.28E-03,8.01E-19,9.24E-18,7.76E-03,0,1.80E-17,1.44E-02,1.75E-02,1.52E-02,0,3.84E-02,4.15E-02,1.65E-02,8.16E-03,3.13E-19,8.16E-03,5.11E-02,0,0.156291699,2.02E-02,0,0.133542694,0,0,7.87E-02,0,0,2.96E-02,0,6.61E-03,3.27E-03,0.321720414,0,4.93E-02,0,8.25E-02,0,6.73E-02,8,108,240,0,0,0,0,95,0,0,40,0.22,Yes,Yes,0.356523193,-1.177795445,0.955305472,1.571301844,-0.812883679,-1.144946258,-0.419224288,9.95E-02,0.795561739,-1.034651936,-0.327372214,-0.146373798,1.111735452,-0.163999141,2177.415351,2032.120904,4209.536255,Negative for malignancy/normal/benign,Not identified,Cannot be assessed,Not reported,Medical record does not state.,Other: Medical record does not state.,Other,Tumor involves the entire endometrial cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,NA,NA,No,Alcohol consumption history not available,Smoking history not available,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Hydrochlorothiazide |Lisinopril|Metformin (GLUCOPHAGE),NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,388|716|1121,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Yes|Yes|Yes,Yes|Yes|Yes,None|None|None,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,374.0|702.0|1107.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-02403,C3L-02403,No,b3,12,127N,CPT0203150008,Tumor,No,78,Female,Endometrioid carcinoma,G2 Moderately differentiated,2.3,152,73,31.64,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,5,Stage I,White,2.168436612,CNV_L,No,0.21,MSS,CNV_L,62,WT,Mutated,WT,WT,WT,0,1.32E-18,0,0,2.08E-18,7.23E-18,1.45E-17,5.52E-03,0,1.19E-18,6.61E-18,8.06E-04,2.42E-02,4.13E-18,0,0,0,4.37E-18,0,1.73E-02,9.67E-20,0,4.39E-03,3.72E-03,0,9.28E-19,5.83E-04,0,2.60E-17,1.04E-21,1.92E-02,8.53E-03,0,7.64E-02,0.116689455,9.92E-18,2.48E-03,2.42E-18,2.48E-03,1.09E-02,0,6.97E-02,4.28E-02,0,0.221969431,0,4.16E-02,0,0,0,8.05E-02,5.12E-02,0.186537618,4.27E-03,1.60E-02,0,2.82E-02,0,0.178831222,3.42E-02,3.33E-02,11,23,0,0,0,10,13,0,0,0,0,0.88,NA,NA,0.524381162,-0.743967618,-0.604988673,0.27303969,-2.066452686,-1.428921729,-1.047606387,0.949078836,0.584137212,-1.25762043,-0.950153209,-1.674099253,1.701743818,-1.27778508,1971.012149,1687.928722,3658.940871,Positive for malignancy,Not identified,Cannot be assessed,Not reported,Medical record does not state.,Other: Medical record does not state.,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,NA,NA,No,Lifelong non-drinker,"Current reformed smoker, years unknown",NA,NA,Unknown,NA,Exposure to secondhand smoke history not available,Medical Record,Yes,Medical Record|Medical Record,carbamazepine|hydrochlorthiazide,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|Yes,Living|Living,375|375,Yes|Yes,No|No,No|No,With Tumor|With Tumor,Unknown|Unknown,Unknown|Unknown,Unknown|Unknown,No|No,No|No,Unknown|Unknown,No|No,Persistent Disease|Persistent Disease,Persistent Disease|Persistent Disease,n/a|n/a,Yes|Yes,Distant Metastasis|Distant Metastasis,Liver|Liver,325.0|325.0,No|No,No|No,No|No,No|No,344.0|344.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02408,C3L-02408,No,b3,9,131C,CPT0111720003,Tumor,No,60,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,175,88,28.54,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,2,Stage I,White,3.182216519,CNV_L,No,0.02,MSS,CNV_L,43,WT,Mutated,WT,Mutated,WT,0.115998006,1.11E-02,0,3.38E-18,5.88E-18,5.47E-04,5.09E-02,2.13E-19,2.74E-18,0,0,2.50E-02,1.28E-18,0,7.00E-03,3.73E-02,4.39E-03,1.23E-02,1.07E-18,6.80E-03,5.59E-02,4.33E-02,2.60E-02,1.64E-03,0,1.37E-02,5.61E-19,3.37E-03,4.29E-18,1.55E-02,2.07E-02,3.15E-03,1.15E-18,5.09E-02,1.55E-17,6.55E-20,6.48E-02,2.51E-02,8.99E-02,0,0,2.23E-02,3.30E-02,0,6.01E-02,0,3.23E-02,3.57E-02,0,1.98E-02,0,0,0.491546368,5.58E-03,0.147737178,0,4.94E-04,0,0.116885107,0,3.46E-02,16,26,0,0,0,0,0,0,0,0,0,0.24,NA,NA,0.220732003,0.824924776,0.478561692,1.129040782,-0.51458853,-5.62E-02,1.664753554,1.113795799,-0.516980841,0.109624338,1.936502917,0.943134015,0.147189091,1.046165377,5123.511869,5729.673666,10853.18553,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Uterine Mass,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Smoking history not available,NA,NA,NA,NA,Exposure to secondhand smoke history not available,NA,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Multivitamin|Probiotic|Aleve,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|Yes,Living|Living,442|442,No|Unknown,No|Unknown,No|Unknown,Tumor Free|Unknown Tumor Status,Never|Unknown,Former Therapy|Former Therapy,Never|Unknown,No|No,No|No,2|2,No|No,Complete Remission|Unknown,Complete Remission|Unknown,n/a|n/a,No|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,424.0|424.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02409,C3L-02409,No,b3,9,128C,CPT0111810003,Tumor,No,60,Female,Endometrioid carcinoma,G1 Well differentiated,2,160,98,38.16,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,5,Stage I,White,8.62E-04,CNV_L,No,4.9,MSS,CNV_L,213,WT,WT,Mutated,Mutated,Mutated,0.127920425,1.90E-02,2.77E-20,0,1.96E-18,0,3.77E-02,2.43E-19,8.74E-19,1.18E-04,2.90E-18,3.50E-03,0,0,6.78E-03,0,0,0,8.79E-18,0,4.10E-03,1.61E-02,1.10E-03,4.66E-04,5.72E-19,4.44E-18,6.24E-03,8.34E-19,1.75E-19,2.03E-02,3.67E-02,1.07E-02,3.92E-19,6.64E-02,1.51E-02,2.65E-02,2.02E-02,0,2.02E-02,1.77E-02,0,0.100500717,0.108031029,0,0.290292754,0,6.15E-02,0,1.58E-02,2.14E-02,1.09E-02,0,2.18E-02,3.83E-02,0.24271605,4.37E-02,2.15E-02,0,1.96E-03,3.86E-03,0,42,21,0,0,0,0,45,0,0,38,18,0.69,NA,NA,1.765120506,0.564339941,-2.352111854,0.254354244,0.560087239,0.560796752,0.372782855,0.63330904,0.983472617,-0.158391359,0.502633446,-0.28393349,0.774743392,1.097352889,3326.749174,4805.566666,8132.315839,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Uterine mass,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,Unknown,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Former Therapy,2,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Adderall|Dexilant|Tribenzor|Albuterol inhaler|Megestrol Acetate|Potassium Chloride|Melatonin|Multivitamin|Vitamin D3,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|Yes,Living|Living,427|427,No|No,No|No,No|No,Tumor Free|Tumor Free,Former Therapy|Former Therapy,Former Therapy|Former Therapy,Never|Never,Yes|Yes,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,404.0|404.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02411,C3L-02411,No,b3,10,129C,CPT0111880003,Tumor,No,81,Female,Endometrioid carcinoma,G2 Moderately differentiated,6,155,76,31.63,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,2,Stage I,White,1.741142725,CNV_L,No,30.57,MSI-H,MSI-H,1387,Mutated,Mutated,WT,Mutated,Mutated,7.38E-02,1.21E-19,0,9.55E-18,1.84E-18,5.97E-18,5.85E-18,0,6.16E-18,1.31E-17,0,0,9.82E-03,0,2.43E-19,3.77E-20,0,0,3.70E-18,5.08E-19,7.20E-03,9.83E-03,0,0,0,0,7.70E-04,3.01E-20,3.76E-18,6.26E-02,3.54E-03,1.17E-02,0,0.103303567,9.27E-03,0,4.80E-03,4.59E-03,9.39E-03,0,8.64E-03,1.43E-02,0,0,0.102083916,0,3.92E-02,1.50E-02,0,1.03E-02,0,0,0.346403092,3.29E-03,8.49E-02,0,2.48E-02,0,0.297429038,1.41E-02,3.96E-02,0,93,340,0,0,0,201,231,0,0,143,0.72,NA,NA,-0.653334298,1.426061946,0.606151999,2.487905238,-0.677717126,1.085715164,1.932674343,6.49E-02,-0.154000378,-0.773704446,2.079873501,-1.686194991,1.788934529,0.352786302,2763.797411,3784.427922,6548.225333,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Caucasian,United States,Other,Uterine Mass,Unifocal,0,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,"Current reformed smoker, more than 15 years",25,28,10,1.5,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Aleve|Altrace|AmLODIPine Besylate|Calcium|Famotidine|Multivitamin|Ocuvite|Aspirin|Simvastatin,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|Yes,Living|Living,385|784,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,367.0|766.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02412,C3L-02412,No,b1,1,131C,CPT0111950003,Tumor,No,74,Female,Endometrioid carcinoma,G1 Well differentiated,5,152,83,35.79,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,2,Stage I,White,5.18E-02,CNV_L,No,0.15,MSS,CNV_L,78,WT,Mutated,Mutated,Mutated,WT,8.56E-02,5.91E-03,1.67E-20,0,1.12E-17,1.30E-19,1.30E-02,3.82E-18,1.43E-17,3.94E-20,2.98E-18,1.86E-02,4.17E-03,0,1.41E-03,0,2.51E-19,0,4.03E-18,5.05E-03,8.83E-03,9.60E-03,1.47E-02,5.35E-03,1.17E-18,0,3.84E-04,0,2.23E-17,6.85E-03,6.49E-03,5.27E-03,0,9.88E-02,2.14E-02,1.38E-18,1.43E-02,0,1.43E-02,7.27E-03,0,9.77E-03,4.65E-02,0,0.184902803,0,0,5.69E-02,0,8.53E-02,0,4.09E-02,0,1.18E-02,0.422752469,2.58E-03,1.21E-02,6.33E-02,0,5.61E-02,0,28,30,0,0,0,11,0,0,0,0,0,0.55,NA,NA,0.925530522,-0.646528744,-0.176359011,-0.721992448,-8.11E-02,-1.302851061,-0.353073928,8.29E-02,-1.037386416,-1.766782648,-0.351390319,-0.29183766,0.855230241,-0.616219991,2140.034868,3257.757281,5397.792149,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Uterine Mass,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,Unknown,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Former Therapy,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Levothyroxine|Lovastatin|Oxybutynin|Aspirin|Calcium + Vitamin D|Multivitamin|Fish Oil|PreserVision,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,354|656|1110,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Former Therapy|Former Therapy|Former Therapy,Former Therapy|Former Therapy|Former Therapy,Unknown|Unknown|Unknown,No|No|No,No|No|No,3|3|3,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,329.0|631.0|1085.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-02449,C3L-02449,No,b3,10,128N,CPT0186720008,Tumor,No,58,Female,Clear cell carcinoma,G3 Poorly differentiated,3.5,163,62,23.34,Present,<50% myometrial invasion,Eighth Edition (2017),pT1a,0,4,IA,Black or African American,NA,NA,Yes,0.04,MSS,POLE,4971,WT,WT,WT,Mutated,Mutated,0.280735109,0.115934873,5.15E-02,2.57E-18,0,1.75E-18,8.87E-02,1.11E-02,0.11887045,0.156832999,0.103774282,3.03E-02,5.12E-02,1.16E-18,7.26E-03,3.12E-03,1.33E-18,0,1.77E-18,1.08E-02,0.121097158,9.21E-02,3.79E-02,5.86E-03,1.65E-03,1.02E-02,2.41E-02,0,1.99E-02,1.98E-02,0.177433185,1.96E-02,6.61E-03,0.159505408,0.253316382,9.39E-03,0.266106727,1.56E-03,0.267667084,2.39E-03,8.38E-03,9.61E-03,0.218345012,0,2.69E-02,4.38E-02,7.79E-02,0,1.08E-02,0,0.192756947,1.64E-02,3.33E-03,0.16084529,0.173853302,3.27E-03,0,4.70E-02,0,0,4.60E-03,189,215,0,0,2288,2259,0,0,0,0,0,NA,NA,NA,-2.504332116,1.04349973,-3.389075924,-0.733873847,2.907446761,2.018156641,1.137366865,-0.153284942,0.560531879,-1.060715759,0.572501364,6.18E-02,1.977527161,-1.708321409,4232.613174,7850.345549,12082.95872,Not Examined,Not identified,Cannot be assessed,Not-Hispanic or Latino,African American,United States,Other,Posterior Endomyometrium,Unifocal,0,0,3,0,0,0,0,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Absent,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record,Yes,Medical Record,Lisinopril,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,461|787|1018|1452,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,461.0|787.0|1018.0|1452.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02468,C3L-02468,No,b2,5,130C,CPT0126000003,Tumor,No,63,Female,Endometrioid carcinoma,G1 Well differentiated,6,157,86,34.82,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,11,Stage I,White,0.110322572,CNV_L,No,0.16,MSS,CNV_L,168,WT,Mutated,WT,WT,WT,7.20E-02,0,0,0,4.46E-18,0,0,6.74E-05,1.32E-02,2.66E-02,0,1.08E-17,2.01E-03,0,4.26E-03,6.64E-02,0,0.303144277,0,0.186393915,8.02E-03,1.40E-02,1.01E-02,0,0,5.01E-18,0,3.44E-19,0,0,1.67E-02,0,0,4.75E-02,0,8.14E-03,1.70E-02,0.191923089,0.208916654,8.43E-02,0,7.23E-02,0.101415408,0,0.265756628,0,2.48E-02,0,0,0,7.62E-02,0,2.49E-02,1.02E-02,0.154901341,7.57E-03,1.81E-02,0.159486207,0,0,0,6,33,53,0,0,0,45,0,0,0,0,0.34,Yes,No,2.530011759,-1.573755029,-0.427557694,-1.155008403,0.163870676,-1.891298456,-0.548637648,-0.659051556,-0.620247876,0.966082411,-0.420254571,0.240475841,-4.85E-02,-0.45084239,5934.88564,4821.034666,10755.92031,Not Examined,Present,Margins uninvolved by invasive carcinoma,Hispanic or Latino,Caucasian,United States,Other,Anterior and Posterior Endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Smoking history not available,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,1,Unknown,Unknown,Unknown,Unknown Tumor Status,Unknown,Unknown,Unknown,No,Yes,Unknown,Unknown,Unknown,Unknown,n/a,Unknown,n/a,n/a,n/a,n/a,n/a,n/a,n/a,1,n/a,n/a,n/a,n/a,n/a
+C3L-02545,C3L-02545,No,b3,10,127C,CPT0113060003,Tumor,No,55,Female,Endometrioid carcinoma,G1 Well differentiated,20,175,134,43.76,Present,=50% myometrial invasion,Seventh Edition (2010),pT3a (FIGO IIIA),pN2 (FIGO IIIC2),9,Stage III,White,1.309762883,CNV_L,No,0.18,MSS,CNV_L,50,WT,WT,Mutated,Mutated,Mutated,0,2.42E-18,1.21E-18,4.27E-18,2.16E-18,0,1.11E-17,0,2.33E-18,1.26E-17,0,0,2.42E-02,0,0,2.21E-03,0,3.57E-17,0,8.88E-03,0,0,3.83E-04,1.46E-03,0,1.09E-17,2.00E-03,1.38E-18,3.59E-19,0,1.25E-18,6.04E-03,1.24E-17,3.17E-02,1.80E-18,0,9.74E-04,1.10E-03,2.08E-03,0.13244133,0,6.20E-02,0,0,0.276946099,0,7.19E-02,0,0,0,5.66E-02,5.73E-02,2.87E-02,0,0.106734243,0,7.98E-02,0,6.13E-02,5.94E-02,6.89E-03,14,34,0,0,0,0,0,0,0,0,0,0.62,Yes,No,-1.035116488,1.384020416,-1.590689016,1.422513221,-0.974504776,1.021570706,-1.223645756,0.505641544,-0.471251062,-1.733072855,-0.97792523,-1.808847395,1.594489641,1.278755187,1096.432541,1618.092058,2714.524599,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and Posterior,Unifocal,IHC staining not done,3,4,IHC staining not done,3,13,IHC staining not done,6,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Current smoker: Includes daily and non-daily smokers,25,NA,10,15,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,"Advair Diskus|Lasix |Lisinopril|Zyrtec|Actos|Tylenol extra strength|MetroCream topical cream|Nystatin cream |ProAir HFA|Lipitor|CeleBREX 200 mg| Motrin IB 200 mg|Prevacid 30mg|Zofran 4 mg|Protonix 40 mg |triamcinolone 0.1% Cream|Tylenol 8 Hour 650 mg oral tablet,|Glucophage 850 mg oral tablet",NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,356|686|1069|1432,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,356.0|686.0|1069.0|1432.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02555,C3L-02555,No,b1,2,127N,CPT0128370003,Tumor,No,71,Female,Endometrioid carcinoma,G1 Well differentiated,4,157,137,55.3,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,White,0.321691537,CNV_L,No,0,MSS,CNV_L,29,WT,Mutated,WT,Mutated,WT,9.10E-02,0,2.35E-19,0,0,0,5.19E-03,0,1.55E-19,1.02E-02,7.07E-19,0,7.04E-03,2.15E-03,1.09E-02,2.79E-02,2.20E-03,0.221278411,0,0.10293007,8.28E-03,1.23E-02,2.62E-03,6.99E-03,0,4.16E-03,0,2.87E-03,0,4.10E-03,1.28E-02,0,0,3.13E-02,2.97E-02,0,2.36E-02,0.138481455,0.162087047,0,1.31E-02,4.17E-02,6.22E-03,0,0.271539543,0,1.98E-02,3.24E-03,0,0,7.06E-02,0,6.78E-02,3.06E-02,0.214448819,2.77E-02,4.10E-02,0.109887591,0,0,8.23E-02,6,8,0,0,0,0,0,0,0,11,0,0.28,NA,NA,0.730573088,0.2194551,0.904615117,-0.773859855,0.600982588,-0.336610101,0.877717236,0.776917366,-0.742086405,0.909248452,0.986073372,0.732167066,-0.910661952,0.548064114,5856.975157,4881.545196,10738.52035,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Aspirin|Cozaar|Toprol-XL,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Deceased,323|685|1059|1421,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,No|No|No|No,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,323.0|685.0|1059.0|n/a,n/a|n/a|n/a|Other : Acute Respiratory Failure,n/a|n/a|n/a|1421.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02557,C3L-02557,No,b3,12,130N,CPT0128610003,Tumor,No,60,Female,Endometrioid carcinoma,G3 Poorly differentiated,3.5,173,62,20.69,Not identified,NA,Seventh Edition (2010),pT1a (FIGO IA),pN0,0,Stage I,White,22.0323639,CNV_H,Yes,0.28,MSS,POLE,10579,Mutated,Mutated,Mutated,Mutated,Mutated,0.220211953,5.10E-02,1.69E-02,5.55E-19,6.69E-04,1.24E-18,1.21E-02,1.31E-02,8.83E-03,3.60E-02,8.54E-03,1.50E-02,1.25E-02,3.11E-18,1.05E-02,8.92E-20,3.37E-18,5.82E-18,7.98E-19,0,7.25E-02,5.25E-02,2.65E-02,2.73E-03,0,0,1.17E-03,0,1.21E-18,2.28E-02,9.78E-02,3.28E-02,0,0.111273874,0.382940999,1.22E-02,9.75E-02,2.95E-18,9.75E-02,1.76E-02,0,7.00E-02,0.15254295,0,6.31E-02,5.22E-02,9.29E-02,0,5.22E-03,0,0.104300943,0,4.25E-02,0.116217161,0.28169176,0,0,0,0,0,1.74E-03,499,638,0,0,5226,4189,0,0,0,0,0,0.26,Yes,Yes,-1.423493372,-0.150835025,-0.981849085,1.746109297,1.274855061,0.46070137,-1.254264295,-1.456454739,0.255014024,-1.441478311,-1.543809731,-1.0709663,0.808796046,-3.59E-02,2769.3066,5475.036872,8244.343472,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Both anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,8,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,Yes,Cannot be determined,Yes,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,p16 & p53: Neoplastic cells demonstrate patchy positivity,Pancytokeratin stain: no tumor cells are identified,No,Alcohol consumption history not available,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Aspirin|Ferrous sulfate|Metformin|Novolin N Relion,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,335|654|1042|1375,No|No|No|No,No|No|No|No,No|No|No|No,Unknown Tumor Status|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,3|3|3|3,No|No|No|No,Not Applicable|Not Applicable|Not Applicable|Not Applicable,Unknown|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,335.0|654.0|1042.0|1375.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02602,C3L-02602,No,b3,12,127C,CPT0113910003,Tumor,No,57,Female,Endometrioid carcinoma,G3 Poorly differentiated,4.2,173,101,33.75,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,4,Stage I,White,1.32E-03,CNV_L,No,26.59,MSI-H,MSI-H,832,WT,WT,WT,Mutated,Mutated,3.34E-19,5.10E-04,3.76E-19,0,6.26E-19,0,0,4.44E-19,3.62E-19,4.89E-20,3.03E-18,4.75E-03,5.87E-03,0,4.90E-20,1.19E-02,0,6.60E-18,0,2.55E-02,3.24E-04,3.85E-20,7.84E-03,5.28E-03,7.01E-18,0,0,2.38E-20,5.72E-19,3.21E-02,2.44E-03,1.07E-02,1.56E-17,9.92E-02,0.152285652,0,4.07E-03,5.96E-03,1.00E-02,0,0,7.02E-02,1.38E-02,0,0.110649004,0,9.53E-02,0,0,0,0.133048471,4.63E-03,4.78E-02,0,0.333095274,0,8.90E-02,9.45E-02,0,8.02E-03,0,0,111,351,0,0,0,0,76,0,0,67,0.57,NA,NA,-1.087374265,0.396265585,-0.37634493,-0.991307533,-0.927092155,-0.242996616,-1.262112281,0.66364367,-1.543802576,-1.215483159,-1.233692111,-1.935854835,0.824210479,-1.660052003,1652.649136,1906.465199,3559.114335,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Anterior and Posterior,Unifocal,IHC staining not done,4,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record,Yes,Medical Record,Vitamin D 2000,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,329|692|1010|1431,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,No|No|No|No,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,329.0|692.0|1010.0|1431.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3L-02741,C3L-02741,No,b1,2,128N,CPT0188190004,Tumor,No,59,Female,Endometrioid carcinoma,G1 Well differentiated,4.2,155,109,45.34,Present,<50% myometrial invasion,Eighth Edition (2017),pT1a [IA],pN0,3,pT1a [IA],White,0,CNV_L,No,0.01,MSS,CNV_L,78,WT,Mutated,Mutated,Mutated,Mutated,0.126942286,2.15E-02,1.71E-02,0,0,2.07E-18,3.35E-02,3.02E-03,2.14E-02,6.28E-02,0,6.40E-03,7.92E-03,1.60E-19,5.97E-03,0,7.52E-04,6.65E-18,6.97E-19,9.47E-03,3.51E-02,2.66E-02,1.44E-02,2.66E-03,0,1.50E-18,6.43E-03,9.63E-21,0,0,6.27E-02,7.23E-03,0,3.31E-02,0.100821553,1.10E-02,5.82E-02,3.32E-18,5.82E-02,2.68E-02,0,3.61E-02,5.27E-02,0,0.280410779,1.16E-02,5.20E-02,0,2.63E-02,0,4.09E-02,0,4.53E-02,8.59E-02,0.201740993,5.55E-02,0,0,0,3.42E-02,5.05E-02,17,32,21,0,0,0,0,0,0,0,0,0.46,Yes,Yes,0.394003455,-0.260573858,0.954144865,-0.1668009,0.595689226,0.273563673,0.532828325,0.731187029,0.373708248,-0.416185648,0.578089144,0.159976594,-0.218940957,1.024427815,4333.180519,5202.11138,9535.291899,Not Examined,Present,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Endometrium,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Lantus|Metformin|Allopurinol|Simvastatin|Metoprolol|Vitamin D,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,414|750,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Former Therapy|Former Therapy,Never|Never,Yes|Yes,Yes|Yes,1|1,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,371.0|707.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-02744,C3L-02744,No,b2,6,128N,CPT0188370003,Tumor,No,73,Female,Serous carcinoma,G3 Poorly differentiated,3.2,157,82,32.92,Present,<50% myometrial invasion,Eighth Edition (2017),pT1a [IA],pN0,2,pT1aN0,White,13.89818603,CNV_H,No,0.64,MSS,CNV_H,72,Mutated,Mutated,WT,WT,WT,0.138868154,3.44E-02,3.30E-03,0,3.52E-19,6.03E-03,0,5.85E-19,3.31E-03,1.25E-03,4.72E-18,0,6.03E-03,0,3.64E-03,8.74E-19,5.07E-20,2.35E-19,0,0,1.59E-02,2.95E-02,1.56E-03,9.53E-03,2.97E-03,0,1.09E-02,0,0,4.19E-02,3.62E-02,1.53E-02,8.20E-18,4.62E-02,9.80E-02,7.17E-03,4.45E-02,5.54E-19,4.45E-02,0,0,2.77E-02,0.116519696,0,0.135637399,0,0.127290393,0,0,0,5.12E-02,6.19E-03,8.33E-02,0.138718106,0.272059461,0,1.69E-02,0,2.19E-02,2.63E-03,0,17,52,0,0,0,0,0,0,0,0,0,0.81,NA,NA,-0.591102835,-0.607538601,-1.592816372,-0.269396188,1.790264105,3.63E-02,-0.305010908,-0.603350865,0.753250993,-1.048370597,-0.641485937,-0.90309894,-0.150361494,7.96E-02,2676.453008,3594.013095,6270.466102,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Caucasian,Other: Unknown,Other,Endometrium,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Positive : 10 %,Negative,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Overexpression,Yes,Cannot be determined,Yes,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,4 or more,p16 - Overexpression,Ki-67 - 80%,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Lisinopril|Hydrochlorothiazide|Lovastatin,NA,NA,NA,NA,Yes,2,12 Months,Yes,Living,455,No,Yes,No,Unknown Tumor Status,Never,Former Therapy,Never,Yes,No,4 or more,No,Unknown,Unknown,n/a,Unknown,n/a,n/a,n/a,n/a,n/a,n/a,n/a,407,n/a,n/a,n/a,n/a,n/a
+C3L-02746,C3L-02746,No,b2,6,130N,CPT0188270004,Tumor,No,66,Female,Endometrioid carcinoma,G2 Moderately differentiated,2.8,160,98,38.26,Present,=50% myometrial invasion,Eighth Edition (2017),pT1b,pN0,2,Stage IB,White,5.79E-03,CNV_L,No,0.15,MSS,CNV_L,66,WT,Mutated,WT,WT,WT,6.95E-02,1.26E-02,6.15E-20,0,0,2.10E-18,0,0,2.16E-19,7.09E-03,0,5.92E-03,7.51E-03,4.94E-19,3.67E-04,2.48E-19,0,1.60E-18,0,3.99E-02,8.80E-03,5.85E-03,1.19E-02,1.07E-03,0,0,7.08E-03,0,1.74E-18,5.57E-18,9.68E-03,8.33E-03,0,9.51E-03,9.31E-02,8.07E-03,1.52E-02,9.23E-19,1.52E-02,1.92E-02,0,0.124264868,7.12E-02,0,0.157472456,0,8.32E-02,0,0,0,3.95E-02,1.20E-02,2.82E-02,0,0.278203367,0,7.26E-02,0,4.20E-02,2.23E-02,4.98E-02,22,38,0,0,0,0,0,0,0,0,0,0.52,Yes,Yes,0.728367234,1.16830713,1.152749632,-0.407007597,0.657242715,0.899014175,5.75E-02,1.238954416,0.607780177,-0.486863626,9.67E-02,-0.535969293,-0.328143904,6.61E-02,3458.857808,3687.508458,7146.366266,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Endometrium,Unifocal,0,0,1,0,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self Report|Self Report|Self Report,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Alpha Lipoic Acid|Aspirin |Calcium|Cinnamon|Cranberry|Januvia|Metformin|Vitamin B-12|Vitamin C|Vitamin E,NA,NA,NA,NA,Yes,2,12 Months,Yes,Living,313,Yes,No,No,Unknown Tumor Status,Never,Never,Never,Yes,No,2,No,Unknown,Unknown,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,288,n/a,n/a,n/a,n/a,n/a
+C3L-02747,C3L-02747,No,b2,6,129N,CPT0188480004,Tumor,No,66,Female,Endometrioid carcinoma,G2 Moderately differentiated,0,160,74,28.85,Present,<50% myometrial invasion,Eighth Edition (2017),pT1a [IA],pN0,2,Stage IA,White,0.16469716,CNV_L,No,0.02,MSS,CNV_L,79,Mutated,Mutated,Mutated,WT,WT,4.12E-02,1.05E-22,8.70E-04,4.52E-18,0,4.80E-18,2.41E-02,9.86E-03,5.64E-02,7.24E-02,0,0,1.75E-02,4.40E-18,4.51E-03,8.37E-03,1.84E-18,0.132000137,0,4.81E-02,0,1.00E-03,9.73E-04,2.88E-03,1.71E-18,0,2.58E-03,0,4.18E-19,0,3.99E-02,7.99E-03,1.47E-18,0,0.122991193,1.71E-03,4.25E-02,7.17E-02,0.114212713,6.42E-02,0,9.01E-02,5.63E-02,0,0.241969113,0,8.69E-02,0,9.07E-02,0,2.89E-02,0,5.58E-02,3.88E-02,0.15366573,2.64E-04,5.30E-02,1.25E-02,0,1.59E-02,1.09E-02,16,37,0,0,0,0,18,0,0,0,0,0.23,NA,NA,0.556301124,-3.46E-02,2.356294556,-0.900618903,0.195743885,7.99E-02,-7.87E-02,0.506302618,-0.775327931,0.533758841,-0.189949983,-0.638969563,-1.566650249,0.740481676,4853.985643,4050.826376,8904.812019,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Endometrium,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Current Therapy,None,NA,NA,Yes,Lifelong non-drinker,"Current reformed smoker, more than 15 years",19,28,5,2.3,Exposure to secondhand smoke history not available,Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Progesterone |Thyroid TABS|Vitamin D,Basal Cell Cancer,Self-reported and Medical Record,Other(Mohs treatment),Yes,Yes,2,12 Months,Yes,Living,355,No,No,No,Tumor Free,Former Therapy,Never,Never,No,No,None,No,Complete Remission,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,321,n/a,n/a,n/a,n/a,n/a
+C3L-02800,C3L-02800,No,b3,11,129C,CPT0186900008,Tumor,No,62,Female,Endometrioid carcinoma,G2 Moderately differentiated,8,160,44,17.36,Present,=50% myometrial invasion,Eighth Edition (2017),1b,1,7,IIIC1,Not Reported,0,CNV_L,No,0.04,MSS,CNV_L,77,WT,Mutated,WT,Mutated,WT,0.13088719,5.54E-02,2.39E-20,0,3.10E-18,5.36E-18,2.03E-02,5.60E-19,5.80E-19,1.30E-02,1.86E-18,3.28E-03,0,4.58E-19,4.00E-03,0,2.91E-03,0,0,2.51E-02,9.02E-02,5.21E-02,4.67E-02,8.00E-03,4.24E-03,3.04E-02,1.26E-02,4.66E-03,1.83E-18,0,3.64E-02,1.13E-02,0,2.48E-02,0,6.41E-03,0.130438247,2.67E-19,0.130438247,0,4.00E-02,6.39E-02,3.20E-02,0,0.191386648,0,2.59E-05,0,0,2.46E-03,7.08E-02,2.80E-02,0,0,0.393414602,0,4.46E-02,9.10E-03,0,4.87E-03,0.119450398,34,35,0,0,0,0,0,0,0,0,0,0.34,NA,NA,0.916979191,0.328143467,-0.133290382,1.07839776,0.800271123,-0.184025007,1.53154745,2.035646258,0.126621444,-0.617025882,1.690917707,1.075984327,0.427187422,0.207440286,4542.005739,5923.57068,10465.57642,Not Examined,Not identified,Cannot be assessed,Not reported,Patient did not disclose,Other: Patient did not disclose,Other,"anterior and posterior endomyometrium, lower uterine segment",Unifocal,3,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Absent,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,4 or more,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Coumadin|Toprol XL|Lasix,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,333|782|1099,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Never|Never|Never,Yes|Yes|Yes,No|No|No,4 or more|4 or more|4 or more,No|No|No,Unknown|Unknown|Unknown,Complete Remission|n/a|n/a,n/a|Complete Remission|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,333.0|782.0|1099.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-02802,C3L-02802,No,b1,3,130N,CPT0174890008,Tumor,No,56,Female,Endometrioid carcinoma,G1 Well differentiated,5.6,170,118,40.75,Present,<50% myometrial invasion,Eighth Edition (2017),pT2 (FIGO II),pN0,5,Stage II,White,NA,NA,No,0.01,MSS,NA,62,WT,Mutated,WT,WT,WT,7.50E-02,3.73E-02,0,0,1.84E-18,7.48E-19,0,3.96E-19,0,1.54E-04,1.16E-18,3.11E-02,2.01E-03,0,6.91E-03,4.59E-03,0,3.39E-02,0,7.58E-02,5.46E-03,7.47E-03,1.24E-02,6.88E-03,0,5.58E-18,1.16E-02,9.66E-19,8.41E-19,3.07E-18,2.87E-02,4.08E-03,0,1.94E-02,5.12E-02,2.00E-02,3.77E-02,1.95E-02,5.72E-02,0,0,0.101281922,3.00E-02,0,0.279683089,0,2.80E-02,0,0,3.21E-02,3.27E-02,1.36E-02,0,1.16E-02,0.350116517,4.78E-02,0,7.33E-02,0,0,0,17,37,0,0,0,0,0,0,0,0,0,NA,Yes,Yes,0.775085641,-1.558194899,-0.390610225,-1.030024653,-0.513403734,-1.015309655,-0.415125526,0.732860877,-0.243031102,-0.346188022,-0.464713404,0.311253083,-0.350183655,-0.646846385,4250.972659,4203.374447,8454.347106,Atypical and/or suspicious,Not identified,Cannot be assessed,Not-Hispanic or Latino,Caucasian,Other: TSS did not collect this information.,Other,Anterior and posterior endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,3,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,1,NA,NA,Unknown,"Consumed alcohol in the past, but currently a non-drinker",Current reformed smoker within past 15 years,Unknown,Unknown,Unknown,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Lisinopril|Metformin|Paroxetine,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|Yes,Living|Living|Living,404|690|844,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown Tumor Status|Unknown Tumor Status|Unknown Tumor Status,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Yes|Yes|Yes,Yes|Yes|Yes,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|n/a,n/a|n/a|Unknown,Unknown|Unknown|Unknown,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,404.0|690.0|844.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-02894,C3L-02894,No,b2,8,129C,CPT0196790004,Tumor,No,67,Female,Serous carcinoma,G3 Poorly differentiated,9,170,124,42.75,Present,<50% myometrial invasion,Eighth Edition (2017),pT3b,pN1,1,IVB,White,32.26682434,CNV_H,No,0.47,MSS,CNV_H,31,Mutated,WT,WT,WT,Mutated,0.116052251,3.07E-03,0,0,0,4.91E-18,6.54E-19,0,3.55E-03,2.64E-02,9.10E-20,2.03E-03,1.44E-02,0,9.05E-03,4.18E-03,6.22E-03,3.08E-18,4.12E-18,4.47E-02,1.96E-02,1.19E-02,6.14E-03,1.44E-02,7.20E-19,2.91E-04,1.05E-03,0,8.45E-18,6.31E-02,1.75E-02,1.37E-02,4.12E-03,2.67E-02,7.72E-02,3.53E-02,3.75E-02,2.09E-03,3.96E-02,1.87E-02,0,3.92E-02,4.22E-03,0,0.389947278,1.24E-02,3.43E-02,0,0,1.82E-02,3.76E-02,0,0,0.125831425,0.308898825,8.39E-03,2.21E-03,0,0,0,0,0,10,13,0,0,6,0,0,0,0,0,0.5,NA,NA,-0.302656356,-1.89215291,1.717822789,5.69E-02,1.634944881,-2.429994437,-0.345932677,-0.717609346,1.553613754,-0.920875207,-0.557953488,-0.147590392,0.335801798,-1.614018749,3864.565935,4180.945317,8045.511252,Positive for malignancy,Not identified,Margin(s) involved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Anterior endometrium,NA,Unifocal,IHC staining not done,1,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,R2: Macroscopic residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Overexpression,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,None,Ki-67 is increased,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Allopurinol|Neurontin|lisinopril |toprol XL|multivitamin,NA,NA,NA,NA,Yes,1,12 Months,No,Deceased,428,Yes,Yes,No,With Tumor,Unknown,Unknown,Unknown,No,No,None,No,Persistent Disease,Patient Deceased,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,Other : Serous carcinoma of uterine,431,n/a,n/a,n/a
+C3L-02953,C3L-02953,No,b4,14,129N,CPT0161590008,Tumor,No,56,Female,Endometrioid carcinoma,G1 Well differentiated,3.5,160,83,32.42,Not identified,NA,Seventh Edition (2010),pT1a (FIGO IA),pN0,0,Stage I,White,3.002217572,CNV_L,No,0.04,MSS,CNV_L,50,WT,Mutated,WT,Mutated,WT,2.31E-02,7.82E-03,6.78E-03,6.67E-19,7.14E-18,1.22E-17,2.04E-18,3.33E-03,1.26E-19,8.50E-03,5.55E-18,9.18E-03,3.82E-02,4.19E-19,2.20E-03,9.63E-19,1.58E-19,5.59E-03,0,5.43E-02,2.76E-04,0,6.97E-03,3.88E-03,2.69E-18,0,6.96E-03,0,0,0,7.73E-03,1.12E-02,0,7.74E-02,0.141421964,0,9.45E-03,2.80E-03,1.22E-02,1.99E-02,0,0,0,0,0.403147937,0,0,0,4.34E-02,0,8.87E-02,0,0,4.23E-02,0.197120605,0.101290754,1.55E-02,6.24E-02,0,2.62E-02,0,18,30,0,0,0,0,0,0,0,0,0,0.25,NA,NA,-0.809465177,-1.892438555,0.368169932,-1.520651544,-0.325254897,-2.145732302,-0.732143709,-0.576884602,-0.264900823,-1.028471118,-0.744663868,-0.435615665,0.207689789,-0.978108343,2859.960218,3016.273958,5876.234176,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Hispanic or Latino,Hispanic,United States,Other,Anterior and Posterior,Unifocal,IHC staining not done,0,0,IHC staining not done,0,5,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,4 or more,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Flonase|lisinopril |imitrex,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,464|652|1063,No|No|No,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Never|Never|Never,Yes|Yes|Yes,No|No|No,4 or more|4 or more|4 or more,No|No|No,Not Applicable|Not Applicable|Not Applicable,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,464.0|652.0|1063.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-02990,C3L-02990,No,b2,6,131N,CPT0238260003,Tumor,No,67,Female,Serous carcinoma,G3 Poorly differentiated,5.3,155,53,22.2,Present,=50% myometrial invasion,Eighth Edition (2017),pT1b,pN1,3,IB,White,15.39006414,CNV_H,No,1.17,MSS,CNV_H,72,Mutated,WT,WT,WT,WT,7.91E-02,2.84E-02,3.56E-03,2.10E-18,1.63E-19,7.55E-19,5.20E-18,1.10E-02,2.39E-19,3.62E-03,2.84E-18,8.89E-19,1.94E-02,6.46E-19,0,1.90E-03,2.14E-19,1.28E-02,1.59E-17,1.64E-02,1.64E-02,2.42E-02,2.07E-03,1.02E-03,2.11E-03,0,5.55E-03,5.78E-19,0,1.12E-02,3.43E-02,6.71E-03,1.18E-02,7.66E-02,0.179428212,0,3.05E-02,9.26E-03,3.98E-02,2.33E-03,0,2.33E-04,5.97E-02,0,0.249125649,1.67E-02,2.25E-02,0,2.69E-02,0,6.90E-02,0,0,0.108754988,0.29473712,0,8.08E-02,0,4.80E-02,0,2.11E-02,23,47,0,0,0,0,0,0,0,0,0,0.79,NA,NA,-1.136808914,-0.18967163,-2.065544496,-1.377735401,0.626819451,0.784653339,-0.705770149,-1.190684684,0.328633076,-0.327609874,-0.981983691,-1.717230186,-0.673944479,-0.457005729,3038.314743,3320.857845,6359.172588,Not Examined,Not identified,Cannot be assessed,Not-Hispanic or Latino,Caucasian,United States,Other,Endomyometrium - posterior and anterior,Multifocal,1,1,0,0,0,0,0,0,IB,R0: No residual tumor,Positive : % Not available,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,"CD10=possible stroma, Ki-67 proliferative, Her2neu negative, P16 positive, Wilms Tumor positive, Beta-Catenin Positive, PTEN positive",NA,No,Alcohol consumption history not available,"Current reformed smoker, more than 15 years",23,63,Unknown,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Medical Record|Medical Record|Medical Record,Naproxen|Valtrex|Evista|Melatonin|Magnesium Oxide|Calcium Citrate D3,NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,274,Yes,Yes,No,Unknown Tumor Status,Former Therapy,Unknown,Never,No,No,None,No,Not Applicable,Not Applicable,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,251,n/a,n/a,n/a,n/a,n/a
+C3L-03143,C3L-03143,No,b4,15,127N,CPT0234080003,Tumor,No,67,Female,Serous carcinoma,Other: Not specified.,6,165,76,27.79,Present,<50% myometrial invasion,Eighth Edition (2017),pT3b,Not assessed,Unknown value,IVB,Asian,48.06698584,CNV_H,No,0.74,MSS,CNV_H,80,Mutated,WT,WT,WT,WT,0.164961528,4.83E-02,6.25E-03,1.15E-18,0,7.38E-18,0,3.71E-03,0,6.37E-03,0,1.19E-02,3.88E-02,0,4.46E-03,2.56E-03,4.75E-19,3.44E-18,2.30E-18,2.07E-02,8.89E-02,6.63E-02,6.30E-02,1.19E-02,0,5.68E-03,4.51E-03,0,3.32E-18,1.21E-02,3.07E-02,2.06E-02,1.97E-02,0.156393402,0.121081541,0,0.106185887,1.28E-03,0.107466999,5.61E-03,0,5.93E-02,1.44E-02,0,0.156323105,0,2.81E-02,8.87E-03,0,0,6.09E-02,4.56E-03,0.1251209,3.23E-02,0.484311913,0,1.50E-03,1.13E-02,0,0,7.51E-03,18,58,0,0,0,0,0,0,0,0,0,0.24,Yes,No,-1.795173579,-0.242445775,-1.823740404,-0.325799406,0.280835724,-0.917153033,-0.338774305,-2.406594955,0.661974435,-1.186250662,-0.492380375,-0.5718681,1.742996723,-1.638259025,3480.166403,5296.123459,8776.289862,Not Examined,Not identified,Margin(s) involved by invasive carcinoma,Not-Hispanic or Latino,Indian,Other: India,Anterior endometrium,NA,Multifocal,IHC staining not done,0,Unknown value,IHC staining not done,0,Unknown value,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Unknown,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,NA,NA,Yes,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record,No,NA,NA,Breast,Medical Record,"Radiation,Surgery",Yes,Yes,2,12 Months|24 Months,No|No,Living|Living,389|767,Yes|Yes,Yes|Yes,No|No,With Tumor|With Tumor,Unknown|Unknown,Unknown|Unknown,Former Therapy|Former Therapy,Yes|Yes,Yes|Yes,2|2,No|No,Unknown|Unknown,Persistent Disease|Persistent Disease,n/a|n/a,No|Yes,n/a|Distant Metastasis,n/a|Other: retroperitoneal lymph nodes and left axilary lymph node,n/a|585.0,n/a|No,n/a|Yes,n/a|No,n/a|No,389.0|767.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-03264,C3L-03264,No,b4,13,131C,CPT0225010003,Tumor,No,73,Female,Endometrioid carcinoma,G2 Moderately differentiated,2.5,160,91,35.43,Present,=50% myometrial invasion,Eighth Edition (2017),pT1b (FIGO IB),pN0,17,Stage IB,White,3.684774312,CNV_L,No,13.39,MSI-H,MSI-H,773,Mutated,Mutated,WT,Mutated,Mutated,0.26994991,8.37E-02,1.74E-02,7.77E-18,0,0,3.05E-02,2.20E-03,3.46E-02,7.49E-02,2.19E-02,1.68E-03,2.71E-02,0,7.36E-03,0,0,5.08E-19,0,1.02E-19,0.14831925,9.96E-02,7.29E-02,5.61E-03,3.21E-05,5.49E-03,7.90E-03,7.70E-03,4.61E-18,9.22E-03,0.121754587,2.86E-02,3.94E-02,0.134016256,0.272077579,0,0.195172954,4.93E-19,0.195172954,0,1.76E-03,4.31E-02,0.182054303,0,1.80E-02,7.74E-02,5.12E-02,1.48E-02,0,0,8.77E-02,0,5.52E-02,0.141830531,0.197392313,0,0,4.59E-02,0,0,8.37E-02,69,16,0,0,0,0,143,124,0,169,64,0.54,NA,NA,-2.200524627,1.135158314,-1.546455211,0.494836154,2.795392137,1.031519067,2.063841158,-2.164076062,2.171478151,-1.515637629,1.695372992,0.372870976,1.037453216,-1.942804878,3734.259429,7173.677855,10907.93728,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Caucasian,United States,Posterior endometrium,NA,Unifocal,IHC staining not done,0,12,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,NA,NA,Yes,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Atenolol|Diltiazem HCL|Hydrochlorothiazide|Meloxicam|Metformin|Toviaz|Voltaren (topical)|Vytorin|Xarelto|Xiidra|Docusate Sodium|Polyethylene Glycol|Potassium Chloride|Senna,Melanoma,Medical Record,Unknown,Unknown,Yes,2,12 Months|24 Months,No|No,Living|Living,507|847,Yes|Yes,No|No,No|No,Unknown Tumor Status|Unknown Tumor Status,Unknown|Unknown,Unknown|Unknown,Unknown|Unknown,Yes|Yes,No|No,2|2,No|No,Unknown|Unknown,Unknown|Unknown,n/a|n/a,Unknown|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,507.0|847.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-03467,C3L-03467,No,b1,4,128C,CPT0188610003,Tumor,No,73,Female,Serous carcinoma,G3 Poorly differentiated,3.2,160,83,32.59,Present,=50% myometrial invasion,Eighth Edition (2017),pT2 [II],pN1 [IIIC1,5,Stage IIIC1: T1-3/N1/M0,White,44.08377525,CNV_H,No,0.22,MSS,CNV_H,57,Mutated,WT,WT,WT,WT,3.32E-02,4.02E-02,1.28E-20,1.54E-18,5.93E-18,6.21E-18,6.35E-19,1.15E-18,1.45E-19,0,1.29E-17,0,1.36E-02,2.28E-03,4.62E-20,0,0,0,1.90E-02,1.65E-02,1.23E-03,2.17E-03,9.99E-03,1.79E-02,6.40E-03,2.14E-18,1.50E-02,2.53E-21,8.70E-18,1.80E-19,0,6.56E-03,0,1.28E-02,9.18E-02,6.49E-18,3.96E-02,1.77E-03,4.13E-02,0.128349982,0,1.40E-02,0.106646197,0,0.124215886,0,7.69E-02,0,0,0,2.63E-02,7.18E-03,5.29E-02,2.24E-02,0.20326056,1.23E-03,1.95E-02,7.26E-02,0.113727205,2.60E-02,4.89E-03,21,32,0,0,0,0,0,0,0,0,0,0.24,NA,NA,-0.73986979,-6.77E-02,-3.18E-02,-0.945841428,1.09690119,0.712829537,0.680169252,-0.979397066,-1.112269404,0.336409659,0.638153991,-0.159035122,-0.935291549,0.285347859,3473.636966,3185.651842,6659.288808,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Endometrium,Unifocal,1,1,7,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Former Therapy,2,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Famotidine|Levothyroxine Sodium|Lexapro|Vitamin B12|Vitamin D,NA,NA,NA,NA,Yes,2,12 Months,Yes,Living,410,No,Yes,No,Tumor Free,Former Therapy,Former Therapy,Never,No,No,2,No,Complete Remission,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,383,n/a,n/a,n/a,n/a,n/a
+C3L-03589,C3L-03589,No,b3,9,130C,CPT0188710003,Tumor,No,59,Female,Endometrioid carcinoma,G2 Moderately differentiated,0.7,173,130,43.48,Not identified,NA,Eighth Edition (2017),pT1a [IA],pN0,4,Stage IA,White,0,CNV_L,No,24.9,MSI-H,MSI-H,762,WT,Mutated,WT,Mutated,WT,5.45E-02,1.10E-02,0,0,0,1.29E-17,8.15E-19,0,1.23E-18,2.02E-18,8.40E-18,3.63E-20,3.09E-02,8.92E-18,3.51E-03,5.66E-21,2.46E-18,0,2.58E-18,9.83E-03,8.67E-03,6.39E-03,3.75E-03,1.81E-03,0,5.81E-19,5.59E-03,7.06E-19,4.29E-17,1.54E-02,3.29E-02,1.51E-02,3.91E-18,5.61E-02,0.146281241,0,1.66E-02,1.26E-20,1.66E-02,4.93E-02,0,5.94E-02,4.50E-02,0,0.146977081,2.45E-02,4.97E-02,0,0,0,3.40E-02,0,0.125292068,6.68E-02,0.138192484,5.25E-03,0,0,0.166581636,1.71E-02,7.19E-02,0,115,238,0,0,0,0,127,0,0,103,0.5,NA,NA,-0.356254436,0.949837004,0.732015304,0.344629367,-0.336280134,1.364646756,1.918183314,-0.307795601,0.35610709,-0.921718338,1.796789357,-1.247110478,2.065531641,5.10E-02,2482.930503,4395.756138,6878.686641,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,White,United States,Other,Endometrium,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,No,Present,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Current reformed smoker within past 15 years,14,58,Unknown,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Esomeprazole Magnesium|Levothyroxine |Simvastatin,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|Yes,Living|Living,427|883,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Unknown|Unknown,Yes|Yes,No|No,None|None,No|No,Complete Remission|Complete Remission,Unknown|Unknown,n/a|n/a,Unknown|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,417.0|873.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-05571,C3L-05571,No,b4,16,131N,CPT0275270003.1,Tumor,No,58,Female,Serous carcinoma,G3 Poorly differentiated,8.2,165,86,31.48,Present,=50% myometrial invasion,Eighth Edition (2017),1b,1a,14,-1,Black or African American,17.9472104,CNV_H,No,0.09,MSS,CNV_H,42,Mutated,WT,WT,WT,Mutated,0.114268749,5.35E-03,1.15E-20,0,3.46E-18,9.53E-19,0,4.21E-03,0,4.60E-19,0,3.97E-18,7.08E-03,0,1.94E-03,2.80E-20,3.98E-19,5.25E-18,0,5.70E-02,1.51E-02,1.88E-02,8.26E-03,3.66E-03,0,0,2.35E-03,2.32E-19,5.04E-18,7.90E-03,3.39E-02,1.23E-03,0,5.10E-02,0.148970878,0,1.73E-02,2.70E-18,1.73E-02,5.48E-02,0,7.14E-03,7.03E-03,0,0.222205039,0,3.44E-02,2.00E-02,0,5.87E-02,0,2.49E-02,7.91E-02,4.01E-02,0.274873476,0,8.01E-02,0,1.94E-02,6.02E-02,1.69E-02,18,20,0,0,0,0,0,0,0,0,0,0.21,NA,NA,-1.384619954,-0.314596071,-1.131237614,0.41941826,2.268785111,0.304576868,-0.13962301,-1.0273441,1.154439286,-0.551835445,-0.184775839,-0.463593241,0.806508233,0.519192186,2886.147055,3651.024917,6537.171972,Negative for malignancy/normal/benign,Not identified,Cannot be assessed,Not-Hispanic or Latino,Black,United States,Posterior endometrium,NA,Multifocal,0,0,4,0,0,0,0,0,Staging Incomplete,R0: No residual tumor,Positive : 65 %,Positive : 2 %,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record,Yes,Medical Record|Medical Record|Medical Record,Celebrex|guaifenesin|naproxen,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|Yes,Living|Living,298|449,Yes|Yes,Yes|Yes,Yes|No,Tumor Free|With Tumor,Never|Never,Former Therapy|Former Therapy,Never|Never,No|No,No|No,2|1,No|Unknown,Complete Remission|Persistent Disease,Complete Remission|n/a,n/a|Persistent Disease,No|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,298.0|449.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3L-05571-b1,C3L-05571,Yes,b1,4,131C,CPT0275270003,Tumor,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-05848,C3L-05848,No,b1,1,129C,CPT0288750003,Tumor,No,70,Female,Serous carcinoma,G3 Poorly differentiated,-2,165,53,19.3,Present,<50% myometrial invasion,Eighth Edition (2017),pT3a,pN0,2,IVB,White,23.02237376,CNV_H,No,0.13,MSS,CNV_H,190,Mutated,WT,WT,WT,Mutated,0.133982091,1.83E-02,6.14E-03,0,3.43E-18,3.22E-19,1.03E-02,3.83E-03,0,2.47E-19,0,1.46E-19,3.06E-02,5.40E-20,1.11E-02,3.50E-03,2.23E-19,0,8.39E-03,2.12E-02,4.08E-02,3.28E-02,1.15E-02,2.96E-03,9.85E-21,1.20E-02,6.68E-03,5.90E-03,0,0,2.13E-02,1.08E-03,0,0,0.23338744,1.62E-18,6.07E-02,1.75E-03,6.24E-02,0,0,4.49E-02,3.29E-02,0,0.209211775,0,4.59E-02,0,0,0,7.64E-02,0,3.31E-02,3.01E-02,0.247757337,4.39E-02,3.65E-02,0,9.03E-02,2.64E-02,8.27E-02,4,119,0,62,0,0,0,0,0,0,0,0.23,NA,NA,2.62E-02,1.056713158,-5.22E-02,-1.322776134,0.177598276,0.662110775,1.510200793,-0.162670933,0.9932458,0.442598882,1.480251765,0.922565198,-1.654268392,1.045739467,4756.659794,5475.906105,10232.5659,Not Examined,Not identified,Cannot be assessed,Not-Hispanic or Latino,White,Other: Unknown,Other,Endometrium,Unifocal,0,0,0,0,0,0,0,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,No,Absent,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,calcium carbonate/vitamin D3 |Naproxen|Propranolol |ibuprofen,NA,NA,NA,NA,Yes,2,12 Months,No,Deceased,153,No,Yes,No,With Tumor,Never,Unknown,Never,No,No,2,No,Persistent Disease,Persistent Disease,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,Other : Endometrial Carcinoma,153,n/a,n/a,n/a
+C3L-05849,C3L-05849,No,b3,10,131N,CPT0288830003,Tumor,No,64,Female,Serous carcinoma,G3 Poorly differentiated,95,165,80,29.35,Present,=50% myometrial invasion,Eighth Edition (2017),pT2,PN2a,6,IIIC2,Black or African American,15.89601897,CNV_H,No,0.01,MSS,CNV_H,59,WT,WT,WT,WT,WT,0.401764688,0.124726736,4.84E-02,1.37E-02,1.39E-02,2.96E-18,0.138651888,0,2.17E-02,6.00E-02,0,3.38E-02,2.59E-02,0,2.70E-02,5.99E-02,1.27E-19,5.25E-02,6.08E-20,6.29E-02,0.129562923,0.108230892,3.49E-02,3.94E-03,2.63E-02,0.121764701,1.93E-02,0,1.48E-19,2.94E-02,8.28E-02,8.09E-03,2.83E-03,0,0.152181899,4.43E-02,0.295071247,6.13E-02,0.356335552,2.21E-02,0,1.49E-02,4.36E-02,5.21E-03,0.257167925,3.77E-02,0,0,0,3.91E-02,0,1.16E-02,0.138619362,7.64E-02,0.273111105,0,2.31E-02,0,4.13E-02,4.02E-03,1.21E-02,21,35,0,0,0,0,0,0,0,0,0,0.18,Yes,No,2.57E-02,1.008324896,0.471269094,-0.759199592,2.307381318,0.73200949,2.19771132,0.604381722,0.10293743,0.185554515,1.98110734,2.310609056,-0.581266888,-0.113105162,6400.839955,8045.434992,14446.27495,Negative for malignancy/normal/benign,Present,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,Black,United States,Other,Endometrium,Unifocal,IHC staining not done,2,16,IHC staining not done,12,Unknown value,IHC staining not done,0,Staging Incomplete,R0: No residual tumor,Positive : 80 %,Positive : 80 %,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of expression,Yes,Absent,Yes,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,p16 - Diffusely reactive,Ki-67 highlights 70-80% of tumor,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,amLODIPine|Cabergoline|Calcium carb-mag oxide-vit D3 |Citalopram|Potassium|RaNITIdine |TraZODone,NA,NA,NA,NA,Yes,2,12 Months,No,Living,385,No,Yes,No,Tumor Free,Never,Never,Never,Yes,Yes,1,No,Complete Remission,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,373,n/a,n/a,n/a,n/a,n/a
+C3N-00155,C3N-00155,No,b4,15,129N,CPT0104820003,Tumor,No,57,Female,Endometrioid carcinoma,G1 Well differentiated,0,165,126,46.28,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,0,Stage I,NA,0,CNV_L,No,0.84,MSS,CNV_L,175,WT,Mutated,WT,Mutated,Mutated,9.04E-02,1.04E-02,4.28E-19,1.99E-18,6.03E-18,0,0,0,0,0,0,7.75E-04,0,4.17E-18,6.60E-03,4.04E-03,5.37E-19,0,0,3.62E-02,2.42E-02,2.56E-02,8.79E-03,2.96E-03,3.61E-19,0,5.73E-03,1.39E-02,5.82E-18,2.85E-02,2.48E-02,5.47E-03,0,0.116365435,2.04E-03,0,3.87E-02,4.71E-03,4.34E-02,0,0,3.89E-02,8.17E-03,0,0.219529533,0,1.56E-02,1.87E-02,0,0,0.128373163,0,8.14E-02,1.38E-02,0.201141274,4.56E-02,3.43E-03,0,3.45E-02,0,0.190958924,10,52,72,0,0,0,0,0,0,0,0,0.17,NA,NA,-0.374553388,-0.289790261,1.121605755,-0.312409698,0.263218036,-1.451544947,0.786626642,0.235738782,0.431825143,-0.186178109,0.811559247,0.696742021,0.134654558,-2.37E-02,4354.35758,4945.367245,9299.724825,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Ukraine,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,18,Unknown,Unknown,Unknown,Unknown Tumor Status,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,n/a,Unknown,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a
+C3N-00520,C3N-00520,No,b2,7,131C,CPT0115000004,Tumor,No,58,Female,Endometrioid carcinoma,G1 Well differentiated,4,154,75,31,Present,=50% myometrial invasion,Seventh Edition (2010),pT3a (FIGO IIIA),pN0,8,Stage III,NA,0.388013171,CNV_L,No,0.01,MSS,CNV_L,39,WT,Mutated,Mutated,WT,WT,2.74E-02,0,7.46E-20,5.96E-20,3.38E-18,5.07E-19,2.35E-02,0,7.96E-19,0,3.36E-19,4.76E-03,0,0,2.36E-03,0,2.12E-18,7.51E-02,0,8.67E-02,9.14E-04,4.32E-03,2.33E-03,5.25E-03,1.45E-18,0,1.75E-19,3.38E-19,1.30E-19,9.22E-03,4.16E-03,1.85E-02,5.13E-18,7.26E-03,3.33E-18,0,5.68E-03,3.82E-02,4.39E-02,5.10E-02,0,0.118470449,3.79E-02,0,0.248511895,0,8.87E-02,0,0,0,3.56E-02,1.12E-02,4.18E-03,0,0.294056151,6.02E-02,0,5.02E-02,0,0,0,11,24,0,0,0,0,0,0,0,0,0,0.5,NA,NA,3.415209041,-0.56644536,0.164344889,-0.382919656,-1.122017538,-0.353273327,-1.091400146,0.61448874,-0.53563564,0.859388667,-0.860110525,-0.754691908,-0.876800191,0.616814696,4178.702317,3528.21591,7706.918227,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,NA,Yes,Medical Record|Medical Record|Medical Record,citrafleet|fragmin|kalipoz,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Deceased,120|638|974|1169,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|With Tumor,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,No|No|No|No,No|No|No|No,1|1|1|1,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|Persistent Disease,n/a|n/a|n/a|n/a,No|No|No|Yes,n/a|n/a|n/a|Distant Metastasis,n/a|n/a|n/a|Liver,n/a|n/a|n/a|1124.0,n/a|n/a|n/a|No,n/a|n/a|n/a|No,n/a|n/a|n/a|Yes,n/a|n/a|n/a|No,132.0|650.0|986.0|n/a,n/a|n/a|n/a|Malignant Neoplasm,n/a|n/a|n/a|1239.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-00732,C3N-00732,No,b2,7,127C,CPT0089600003,Tumor,No,56,Female,Endometrioid carcinoma,G1 Well differentiated,2.2,168,107,38.09,Not identified,NA,Seventh Edition (2010),pT1a (FIGO IA),pN0,16,Stage I,White,4.70E-03,CNV_L,No,0.05,MSS,CNV_L,55,WT,WT,Mutated,WT,Mutated,3.37E-02,2.83E-19,3.45E-04,5.27E-18,1.06E-18,1.54E-21,1.94E-02,1.15E-02,2.99E-02,4.19E-02,3.38E-18,2.59E-18,8.99E-03,1.63E-03,3.42E-03,1.73E-02,0,0.102854363,0,9.45E-02,2.99E-03,6.79E-03,1.68E-03,6.43E-03,7.62E-19,2.39E-03,0,1.54E-04,0,2.57E-03,6.77E-03,6.35E-03,0,2.29E-02,1.26E-02,1.68E-02,3.02E-02,6.01E-02,9.03E-02,5.81E-02,0,2.14E-02,4.12E-02,0,0.335995816,1.30E-02,0,0,2.17E-02,0,3.03E-02,3.33E-02,5.94E-02,6.76E-04,0.183489103,0,5.60E-03,0.172183947,0,2.35E-02,0,14,36,0,0,0,0,0,0,0,0,0,0.74,Yes,No,1.281583094,0.344033172,0.792653576,-0.851252327,-2.04783258,1.267562224,-0.537584745,1.140270672,-0.18848291,0.968124511,-0.408493069,-0.90066271,-0.992093506,2.390558613,4748.579197,3968.887673,8717.46687,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,white,United States,Anterior endometrium,NA,Multifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Perimenopausal: 6-12 months since last menstrual Period,Never,None,see below comment,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,albuterol|Duloxetine |insulin aspart|insulin glargine|meloxicam|pantoprazole|Ranitidine|simvastatin,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,411|795|1126|1393,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,None|None|None|None,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,365.0|749.0|1080.0|1347.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-00750,C3N-00750,No,b4,16,127C,CPT0265110003,Tumor,No,44,Female,Endometrioid carcinoma,G2 Moderately differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,White,4.069838891,CNV_L,No,0.03,MSS,CNV_L,122,WT,Mutated,Mutated,WT,WT,5.91E-04,0,1.96E-20,0,7.54E-18,0,0,0,0,0,1.79E-19,0,3.18E-03,7.77E-18,0,1.67E-18,1.57E-19,7.76E-03,8.65E-18,0,0,0,0,1.68E-03,5.45E-18,0,0,0,5.32E-18,7.51E-03,1.31E-02,1.97E-03,0,8.75E-02,6.41E-02,0,1.12E-03,6.00E-03,7.12E-03,0,0,3.45E-03,6.14E-03,0,0.282786595,0,0,7.64E-03,0,9.49E-02,0,0,0.313552183,0,0.21834091,0,7.32E-02,0,0,0,0,37,57,0,17,0,0,0,0,0,0,0,0.4,NA,NA,-0.560139313,0.799378482,0.345423557,2.295443066,-0.706807355,0.772592175,-0.346099661,1.473413909,1.838931664,5.65E-02,-1.10E-02,-1.185084524,1.204169953,0.738238381,2901.657976,2816.021258,5717.679233,NA,NA,NA,Hispanic or Latino,Hispanic or Latino,Other: Unknown,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-00753,C3N-00753,No,b4,14,127C,CPT0129640003,Tumor,No,51,Female,Endometrioid carcinoma,G3 Poorly differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Asian,5.439517525,CNV_L,No,23.27,MSI-H,MSI-H,1855,WT,Mutated,WT,Mutated,Mutated,0.248709586,6.63E-02,7.99E-03,2.86E-03,2.08E-20,1.11E-18,7.44E-03,8.90E-03,2.77E-02,2.68E-02,1.48E-02,3.33E-02,2.31E-02,0,2.19E-02,9.14E-03,1.14E-18,1.06E-02,5.35E-19,1.21E-02,3.64E-02,3.96E-02,1.17E-02,9.26E-04,1.28E-18,2.60E-03,0,0,0,2.29E-02,7.79E-02,1.32E-02,9.71E-18,0.113754093,0.207907802,0,0.103872701,1.14E-02,0.115266428,3.03E-02,0,5.79E-02,0.109810609,0,0.152099328,6.13E-02,8.53E-02,0,0,1.98E-02,1.43E-02,0,8.99E-02,8.95E-02,0.230033669,0,1.65E-02,0,2.99E-02,0,1.34E-02,0,42,710,0,0,0,0,0,262,0,381,0.28,NA,NA,-1.166468405,1.359253342,-0.549850734,-0.117554452,1.899269333,1.326601228,0.773041072,0.10522105,0.147770966,9.95E-02,0.705746679,-0.136648547,0.47250482,8.26E-02,4933.254317,6033.839712,10967.09403,NA,NA,NA,Not-Hispanic or Latino,Non-Hispanic,Other: unknown,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-00755,C3N-00755,No,b2,7,130C,CPT0129520003,Tumor,No,42,Female,Endometrioid carcinoma,G1 Well differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Not Reported,0,CNV_L,No,0,MSS,CNV_L,31,WT,Mutated,WT,Mutated,Mutated,1.88E-02,4.03E-03,0,8.55E-19,6.90E-18,3.67E-02,7.13E-02,1.49E-18,0,1.91E-20,0,3.25E-02,2.95E-18,0,1.00E-02,0,3.19E-02,7.43E-02,0,1.23E-02,4.09E-19,7.68E-04,1.50E-03,6.88E-03,8.69E-18,0,0,0,6.51E-19,6.78E-02,1.77E-02,6.61E-03,0,2.35E-02,0,0,3.52E-02,3.87E-02,7.39E-02,3.37E-02,0,7.68E-02,0.102958021,0,0.364702102,0,1.27E-02,7.07E-03,0,5.21E-02,0,5.63E-03,5.47E-02,9.28E-03,5.54E-02,3.49E-02,2.23E-02,0.114542263,0,0,5.32E-02,5,10,0,0,2,5,5,0,0,0,0,0.21,NA,NA,0.317408249,0.909689511,1.35503569,-0.596165032,-0.472720313,-1.34E-02,0.274011505,0.650156366,-1.078573686,1.91E-02,0.5196641,0.597370721,-0.380419227,0.797239601,3799.267126,4355.661811,8154.928937,NA,NA,NA,Hispanic or Latino,Hispanic or Latino,Other: unknown,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-00859,C3N-00859,No,b4,16,130N,CPT0116010004,Tumor,No,60,Female,Other,G2 Moderately differentiated,9,170,121,41,Present,=50% myometrial invasion,Eighth Edition (2017),pT3b,pN0,5,Stage IIIB,NA,2.03E-03,CNV_L,No,0.01,MSS,CNV_L,85,WT,WT,Mutated,WT,WT,5.07E-02,0,3.43E-20,1.68E-18,1.43E-18,0,0,0,3.57E-18,0,0,0,0,0,0,3.77E-03,0,0.147814991,0,3.37E-02,1.85E-03,1.50E-02,0,5.89E-03,0,0,0,0,5.68E-18,6.13E-19,1.92E-02,8.06E-03,0,5.64E-02,9.50E-02,0,5.16E-03,7.99E-02,8.50E-02,0,0,6.99E-02,4.36E-02,0,0.166125082,0,8.13E-02,0,2.13E-02,0,7.75E-02,4.08E-02,0,1.80E-02,0.346618089,0,5.73E-02,7.76E-02,0,0,0,30,52,0,0,0,0,0,0,0,0,0,0.35,NA,NA,-0.355841341,0.949596302,0.509884697,0.344707027,7.43E-02,0.829933884,-0.501319963,1.379621481,0.790181976,0.634357178,-0.2743769,-0.293629283,0.264688355,-0.430693492,4213.747375,3389.428758,7603.176133,Negative for malignancy/normal/benign,Not identified,Cannot be assessed,NA,Caucasian,Poland,Fundus,NA,Multifocal,0,0,0,0,0,0,0,0,0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,4 or more,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Offtensin|Tritace|Clexane|Exacyl|cyclonamine|Potassium Chloride|fortrans,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,377|722|1056|1440,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,No|No|No|No,4 or more|4 or more|4 or more|4 or more,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a|n/a,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,392.0|737.0|1071.0|1455.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-00860,C3N-00860,No,b4,15,128C,CPT0116140004,Tumor,No,67,Female,Endometrioid carcinoma,G1 Well differentiated,5.5,154,76,32,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,NA,2.16E-03,CNV_L,No,0.04,MSS,CNV_L,97,WT,Mutated,WT,WT,Mutated,3.96E-02,0,0,5.59E-19,1.38E-17,1.16E-02,5.34E-03,1.05E-18,9.96E-19,0,0,1.78E-02,2.00E-04,0,7.17E-03,1.42E-02,1.59E-19,3.14E-02,0,7.35E-02,0,3.64E-04,3.80E-03,7.39E-03,0,0,3.71E-19,0,1.50E-18,1.24E-02,1.76E-02,1.79E-03,3.86E-18,5.11E-02,2.07E-18,0,9.71E-03,2.28E-02,3.25E-02,0,2.04E-02,6.12E-02,4.07E-02,0,0.256280046,0,0,1.42E-02,0,0,5.78E-02,4.53E-04,4.17E-02,7.67E-03,0.396138995,1.81E-02,3.86E-03,7.01E-02,0,0,1.13E-02,15,39,0,0,0,12,20,0,0,0,0,0.45,NA,NA,0.663451845,-1.042030566,0.261304497,-1.286492731,-0.609085537,-1.379859068,-0.580944902,1.044305932,-0.291931793,-0.480919047,-0.294376313,0.96234935,-0.962555868,-0.250919023,3751.168781,3712.832239,7464.00102,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Absent,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Nebicard|Vanatex HCT|Fragmin|Potassium Chloride|fortrans,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,351|451,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,364.0|464.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-00872,C3N-00872,No,b3,11,130N,CPT0116250004,Tumor,No,75,Female,Endometrioid carcinoma,NA,5.5,158,74,29.64,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,6,Stage I,NA,0,CNV_L,No,0.05,MSS,CNV_L,60,WT,Mutated,WT,Mutated,WT,7.26E-02,1.97E-02,0,0,8.19E-18,3.12E-18,4.45E-02,6.47E-04,7.72E-03,8.32E-03,0,7.96E-04,2.12E-03,0,3.18E-03,3.99E-19,0,2.10E-02,0,3.23E-02,1.24E-20,2.52E-04,4.25E-03,5.53E-03,0,1.02E-17,4.54E-03,0,9.92E-18,3.62E-02,1.02E-02,4.00E-02,6.16E-18,6.82E-02,5.63E-05,1.13E-02,2.41E-02,1.05E-02,3.46E-02,1.66E-02,0,0.157788327,0.127586723,0,0.205252977,0,4.06E-02,0,0,2.66E-02,0,6.66E-03,0.195866958,2.12E-02,0.136866067,0,0,0,6.51E-02,0,0,15,21,0,0,0,8,13,0,0,0,0,0.26,Yes,No,0.529870461,-0.269327231,0.954137734,1.055162237,-0.83937678,-0.765850621,-0.298832009,0.342006037,4.76E-02,-0.214148673,-0.213604886,0.277916899,0.107628576,-7.73E-02,3109.754015,3623.267678,6733.021693,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Other,Uterine cavity,Multifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,No,NA,NA,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,399|651|1106|1554,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,411.0|663.0|1118.0|1566.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01007,C3N-01007,No,b1,2,130C,CPT0095100003,Tumor,No,59,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.5,164,92,34,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,4,Stage I,NA,0,CNV_L,No,7.81,MSI-H,MSI-H,1075,Mutated,Mutated,Mutated,Mutated,Mutated,0.126893776,3.37E-02,7.71E-03,1.67E-18,0,0,2.01E-02,2.38E-03,1.60E-02,4.23E-02,0,4.58E-19,0,1.94E-19,3.46E-03,1.02E-18,0,0,0,9.63E-03,1.42E-02,9.47E-03,1.30E-02,4.31E-03,2.61E-03,2.89E-03,1.04E-02,2.52E-03,1.01E-17,2.27E-02,2.67E-02,1.31E-02,0,5.48E-02,0.129531956,1.54E-02,5.14E-02,5.10E-19,5.14E-02,1.92E-03,0,3.09E-02,5.85E-02,0,0.221617916,0,7.77E-02,0,7.21E-04,0,5.70E-02,0,0.1428905,0.123243905,0.125879396,3.05E-04,0,0,6.27E-02,9.09E-03,8.75E-02,409,154,454,0,0,0,0,0,0,0,0,0.7,NA,NA,-0.160785511,-0.928428143,-2.30E-02,0.660467572,1.092692701,-0.545833911,0.345920518,-0.731912121,9.64E-02,-1.218773649,0.316729301,-0.378349943,0.207863952,0.625548468,2972.189086,4667.48303,7639.672116,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : 70 %,Positive : 50 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Bisocard|Axtil|Aldac|Concor|Hydroxyzin|Fragmin,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,364|364,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Unknown|Unknown,Never|Never,Yes|Yes,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,376.0|376.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01008,C3N-01008,No,b1,3,127C,CPT0095130004,Tumor,No,68,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,156,98,40,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,8,Stage I,NA,0,CNV_L,No,0.16,MSS,CNV_L,91,WT,Mutated,Mutated,WT,WT,4.80E-02,5.13E-03,0,2.66E-18,2.25E-18,1.07E-18,0,4.87E-04,1.57E-19,8.22E-03,7.01E-18,1.18E-02,1.35E-02,5.94E-03,2.78E-03,2.39E-19,1.39E-18,0,1.21E-18,1.64E-02,2.89E-03,5.78E-03,7.59E-04,5.50E-03,0,3.87E-18,3.16E-03,1.53E-19,3.71E-18,2.21E-02,1.63E-02,9.91E-04,0,2.86E-02,0.10970217,1.50E-03,1.09E-02,1.20E-19,1.09E-02,0,0,2.19E-02,5.18E-02,0,0.261957218,0,0,0,0,3.47E-02,5.64E-02,1.47E-02,4.50E-02,3.82E-03,0.276429343,6.89E-02,4.03E-02,0.124152219,0,0,0,30,56,0,0,0,0,0,0,0,0,0,0.84,NA,NA,0.189670667,0.266224453,-1.693862585,-0.210503442,-0.403409309,0.750945395,-0.912400656,0.454799384,2.131324119,-0.17124993,-0.611319975,-1.270733903,1.098963889,0.990166908,2920.666306,3286.44014,6207.106445,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,0,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 90 %,Positive : 50 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,Ki67 positive 40%,NA,No,Lifelong non-drinker,Current smoker: Includes daily and non-daily smokers,20,NA,20,48,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record,Bespres|Polocard|Clexane,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,347|318,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Unknown|Unknown,Never|Never,Yes|Yes,No|No,1|1,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,371.0|342.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01079,C3N-01079,No,b1,2,131N,CPT0211120004,Tumor,No,77,Female,Endometrioid carcinoma,G1 Well differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2.19E-02,CNV_L,No,0.27,MSS,CNV_L,56,Mutated,WT,WT,WT,WT,8.68E-02,0,0,6.04E-19,0,0,2.51E-02,4.41E-03,0,2.24E-02,9.63E-19,0,2.45E-02,0,5.64E-03,2.45E-02,5.71E-19,0.104588958,6.58E-18,0.100437458,7.48E-03,4.16E-03,9.49E-03,2.43E-03,0,0,0,5.97E-20,0,0,1.40E-02,1.40E-02,4.55E-18,1.73E-02,0.115950423,0,1.04E-02,6.90E-02,7.94E-02,4.81E-02,0,0.100444197,0,0,0.32654871,0,5.06E-03,0,5.34E-02,0,5.23E-02,4.26E-04,2.91E-02,3.41E-02,0.220740066,2.64E-02,2.91E-03,7.92E-02,0,0,2.13E-02,13,30,0,0,0,0,11,0,0,0,0,1,NA,NA,0.680457885,1.045625842,1.572270619,0.379230172,-0.920658346,1.537915387,-0.243829645,-0.98072042,-1.617707419,0.251512131,-0.295053867,-0.200529493,-0.185502919,0.728184302,4836.138544,3845.676546,8681.815089,NA,NA,NA,NA,Caucasian,Ukraine,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01171,C3N-01171,No,b3,10,128C,CPT0095190003,Tumor,No,82,Female,Endometrioid carcinoma,G2 Moderately differentiated,4,164,88,32,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,NA,0,CNV_L,No,18.81,MSI-H,MSI-H,685,Mutated,Mutated,WT,Mutated,Mutated,0.111760853,1.93E-02,4.43E-03,3.30E-19,2.38E-21,1.33E-19,1.03E-02,8.39E-03,1.04E-02,1.73E-02,0,8.99E-03,1.92E-02,0,6.55E-03,8.34E-03,0,0,5.48E-18,1.13E-02,1.42E-02,1.45E-02,4.88E-03,1.30E-03,1.48E-18,1.03E-02,7.66E-03,1.86E-04,1.72E-18,7.80E-03,2.93E-02,3.39E-03,0,5.65E-02,0.145371868,0,4.16E-02,4.17E-03,4.58E-02,0,0,6.39E-02,0.150286416,0,0.130687333,0,8.47E-02,1.26E-02,0,0,7.15E-02,1.09E-02,0.120323693,8.21E-02,0.132281152,8.38E-03,0,0,9.64E-02,0,3.59E-02,48,12,134,0,0,0,126,92,0,0,60,0.59,Yes,No,-0.674945537,1.373998424,-3.64E-02,1.228499994,1.368713733,1.441871718,1.457994086,0.375543651,1.1153636,-0.1715209,1.317100488,0.189389964,-0.122353855,1.268667507,3989.733683,5208.547429,9198.281112,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Multifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 70 %,Positive : 50 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,glucophage|sotahexal|Diuver|cardura|milurit|atoris|hyzaar forte|Polocard,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Deceased,492|596|1009,No|No|No,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,Yes|Yes|Yes,None|None|None,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,505.0|609.0|n/a,n/a|n/a|Cardiovascular Disorder,n/a|n/a|1035.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-01172,C3N-01172,No,b2,8,131N,CPT0095230003,Tumor,No,57,Female,Endometrioid carcinoma,G2 Moderately differentiated,2.5,165,80,29,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,1,Stage I,NA,0,CNV_L,No,20.34,MSI-H,MSI-H,553,Mutated,Mutated,WT,Mutated,Mutated,1.47E-19,8.06E-03,8.59E-04,9.18E-19,5.36E-18,0,7.82E-19,1.00E-19,0,0,3.70E-18,1.98E-03,2.91E-02,0,0,0,0,0,0,1.70E-02,1.14E-03,2.69E-19,0,2.25E-04,2.09E-21,0,5.78E-03,1.14E-19,0,5.12E-02,1.30E-02,6.40E-03,1.77E-18,9.11E-02,6.21E-02,0,6.28E-03,3.38E-20,6.28E-03,0,0,9.63E-03,2.72E-02,0,0.317302471,0,7.37E-02,0,0,0,2.35E-04,5.18E-02,0.149642383,2.74E-02,7.01E-02,1.82E-03,5.85E-02,0,0.134244772,1.54E-02,6.30E-02,35,67,92,0,0,0,95,99,0,0,0,0.83,NA,NA,-0.946346098,0.625565314,-1.265186151,1.570568446,-0.437061472,1.262791341,0.684931362,-0.874411812,0.500136872,-1.360868182,0.801243699,-2.136479043,2.218294711,0.800431099,1771.707456,3149.409592,4921.117048,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,0,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 60 %,Positive : 50 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Normal,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record,Yes,Medical Record,Fragmin,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,555|709|961|1479,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,No|No|No|No,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,568.0|722.0|974.0|1492.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01341,C3N-01341,No,b1,4,131N,CPT0095280003,Tumor,No,64,Female,Endometrioid carcinoma,G2 Moderately differentiated,5,168,95,33,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,9,Stage I,NA,9.78E-04,CNV_L,No,0,MSS,CNV_L,66,WT,Mutated,WT,Mutated,Mutated,8.64E-02,1.87E-02,6.33E-21,9.73E-19,6.85E-20,6.53E-19,7.82E-03,1.84E-19,2.75E-18,1.43E-02,4.32E-19,1.37E-02,7.24E-03,0,4.12E-03,1.53E-03,1.94E-19,4.93E-03,2.11E-18,2.49E-02,8.86E-03,8.82E-03,4.71E-04,1.02E-03,5.18E-19,2.79E-04,2.78E-03,7.02E-20,1.77E-18,1.13E-03,2.93E-02,8.05E-03,1.00E-19,7.09E-02,7.08E-03,0,2.20E-02,4.65E-03,2.67E-02,0,3.62E-03,0.10405177,4.75E-02,0,0.156197673,0,6.11E-02,0,0,0,0.123430266,9.08E-02,0,3.38E-03,0.263868097,2.58E-02,7.42E-02,3.96E-02,0,0,6.60E-03,27,32,0,0,0,0,0,0,0,0,0,0.66,NA,NA,0.478038492,0.102807066,0.239316893,3.78E-02,-0.283586538,0.603740038,8.86E-02,1.517526455,-9.23E-04,-0.251237819,0.331323806,0.163221202,-0.476046881,-0.253778156,3896.851759,4284.434815,8181.286574,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Multifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : 70 %,Positive : 50 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Berotec|Tezeo|Cipronex|Fragmin,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,343|343,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,353.0|353.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01342,C3N-01342,No,b1,1,130C,CPT0088140003,Tumor,No,68,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.6,164,95,35,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,11,Stage I,NA,0,CNV_L,No,0.03,MSS,CNV_L,59,WT,Mutated,Mutated,WT,WT,4.83E-02,2.29E-03,1.25E-19,0,8.32E-18,0,2.65E-02,0,9.06E-21,6.77E-04,0,1.06E-19,1.35E-03,2.31E-18,5.66E-21,0,2.19E-18,0,0,5.08E-03,9.75E-04,3.45E-03,1.07E-03,4.50E-03,3.48E-19,1.95E-18,2.58E-03,0,7.04E-18,8.99E-03,8.73E-03,8.96E-03,0,7.73E-02,3.45E-02,0,5.18E-03,6.51E-04,5.83E-03,0,0,0,4.73E-02,0,0.329401231,0,0,3.76E-02,0,0,0.110133339,7.10E-03,0,5.30E-02,0.249179947,0,2.93E-02,9.22E-02,0,0,4.49E-02,18,28,0,0,0,0,0,0,0,0,0,0.92,NA,NA,0.678806316,8.62E-02,-0.351367591,-0.266862729,-0.524670381,0.507325205,-0.994790741,0.174274834,-0.248611771,-0.28194513,-0.92349606,-0.96983554,1.933994758,0.47664936,2743.521331,2888.741889,5632.263219,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Multifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Concor|Axtil|Ipress Long|Lecalpin|Captopril|Meloxicam,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,233|702|1063|1329,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,242.0|711.0|1072.0|1338.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01344,C3N-01344,No,b2,8,130N,CPT0116340004,Tumor,No,80,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,150,107,47,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,10,Stage I,NA,4.89E-03,CNV_L,No,0.05,MSS,CNV_L,83,WT,Mutated,Mutated,Mutated,Mutated,6.35E-02,0,1.39E-18,2.55E-18,7.34E-18,0,3.44E-19,0,0,4.41E-18,0,0,0,0,6.07E-20,6.22E-19,8.47E-19,0,6.08E-19,2.54E-02,0,2.68E-03,4.99E-20,1.88E-05,4.28E-19,0,0,4.51E-19,7.65E-18,3.57E-04,2.69E-02,1.75E-03,0,7.77E-02,3.99E-19,9.22E-18,1.25E-05,1.67E-03,1.68E-03,0,0,1.88E-02,1.86E-02,0,0.184320429,0,7.32E-02,0,0,8.96E-03,5.74E-02,7.98E-02,0.106922609,1.56E-02,0.207896215,0,0.123534418,0,3.42E-02,7.49E-03,6.33E-02,36,24,0,16,0,0,0,0,0,0,0,0.65,Yes,No,1.010584357,0.958917619,-0.122092578,0.591618951,1.71548221,0.83661242,0.373454018,1.909639803,-0.407318649,0.19433427,0.53145782,-1.059607454,5.48E-02,1.491327304,2816.325831,3437.612906,6253.938737,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Multifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,oxycordil|diuresin|doxor|Clexane|kalipoz|fortrans|Cipronex|Potassium Chloride,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Deceased,424|654|1011|1011,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,448.0|678.0|1035.0|n/a,n/a|n/a|n/a|Respiratory Disease,n/a|n/a|n/a|1359.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01525,C3N-01525,No,b1,4,129C,CPT0095880003,Tumor,No,77,Female,Endometrioid carcinoma,G2 Moderately differentiated,2,162,101,38.48,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,NA,6.56E-02,CNV_L,No,0.01,MSS,CNV_L,69,WT,Mutated,WT,Mutated,Mutated,0.113347672,0.138089515,7.26E-04,2.62E-02,0,8.19E-04,6.67E-02,3.50E-03,1.89E-02,1.32E-02,1.31E-19,3.48E-02,7.21E-03,0,1.22E-02,1.58E-02,0,1.90E-02,3.28E-20,5.74E-02,9.95E-03,1.08E-02,8.24E-03,9.63E-03,2.18E-02,2.38E-02,2.43E-02,2.24E-02,1.02E-19,4.41E-02,3.23E-02,1.70E-02,0,9.39E-03,4.00E-02,0,0.15660197,1.74E-02,0.174028141,6.38E-03,6.54E-02,6.88E-02,6.37E-02,0,0.338442194,0,1.62E-02,3.19E-02,0,1.67E-02,5.76E-03,0,3.46E-02,2.58E-02,0.159190306,0,6.96E-03,0,1.46E-02,0,0.145579711,22,38,0,0,0,0,0,0,0,0,0,0.24,NA,NA,0.968455041,0.116985024,0.586825416,-1.830026394,0.652835063,-0.358812128,1.262459572,1.604640792,-1.190027371,7.66E-03,1.127224327,2.7023317,-2.335287933,1.259273466,4455.437792,5641.829234,10097.26703,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 50 %,Positive : 30 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,Cytokeratin 7 - positive; Vimentin - positive;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,No,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,221|678,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,No|No,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,206.0|663.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01526,C3N-01526,No,b1,2,129N,CPT0096010003,Tumor,No,66,Female,Other,G1 Well differentiated,1.5,163,120,45.17,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,9,Stage I,NA,1.44E-02,CNV_L,No,0.18,MSS,CNV_L,46,WT,Mutated,Mutated,WT,Mutated,4.79E-02,8.74E-03,2.18E-03,8.02E-18,0,1.50E-02,2.25E-02,8.64E-04,8.43E-04,1.38E-02,4.71E-18,1.20E-02,1.30E-02,1.30E-03,1.40E-04,3.54E-19,1.04E-03,7.72E-18,1.41E-17,4.30E-03,1.20E-02,1.28E-02,2.26E-03,8.05E-03,0,0,4.29E-03,3.69E-19,0,3.37E-02,7.09E-02,1.35E-02,3.66E-18,6.71E-02,5.27E-02,8.39E-03,2.05E-02,4.04E-18,2.05E-02,2.07E-02,0,2.84E-03,0.112389797,0,0.253080028,0.109388855,2.77E-03,0,0,0.140858162,0,4.29E-02,1.87E-02,6.00E-02,0.168718426,1.14E-02,0,0,4.26E-02,1.37E-02,0,11,18,12,0,0,0,0,0,0,0,0,0.75,NA,NA,-0.602082745,-0.32923972,0.335663418,-0.711535885,2.75E-02,0.266371451,-0.389968954,0.170962117,0.524567535,-0.974226972,-0.49249463,-0.756494607,-0.616693079,0.192387266,2830.616714,3480.307943,6310.924657,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 25 %,Positive : 45 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,Cytokeratin 7 - positive; Vimentin - positive;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,No,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,206|663,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,206.0|663.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01529,C3N-01529,No,b3,10,130C,CPT0097440003,Tumor,No,70,Female,Endometrioid carcinoma,G2 Moderately differentiated,2,161,77,29.71,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,NA,1.420667785,CNV_L,No,0.28,MSS,CNV_L,77,Mutated,WT,WT,Mutated,Mutated,6.98E-03,4.57E-03,0,0,2.18E-18,4.33E-03,0,0,2.53E-18,0,1.41E-18,6.17E-03,3.70E-04,8.82E-03,3.04E-19,1.52E-18,4.88E-19,0,0,1.98E-02,4.35E-18,1.53E-19,0,1.55E-02,2.79E-18,5.69E-18,2.68E-03,5.93E-19,0,5.64E-02,2.14E-02,2.04E-02,1.78E-18,4.84E-02,6.97E-02,0,1.34E-02,7.58E-19,1.34E-02,0,0,6.18E-03,4.62E-02,0,0.283628823,0,0,5.17E-03,0,3.31E-02,1.99E-02,2.27E-02,0.372928636,1.85E-03,0.191768164,0,6.01E-03,0,0,1.05E-02,0,15,39,0,0,0,0,17,0,0,0,0,0.8,NA,NA,-0.350698157,-1.787253484,0.375408379,-0.430807066,-0.988606273,-1.114672298,-1.083843371,-0.533844894,-0.259319555,-0.962619281,-1.003937609,-0.156204933,0.935634718,-1.285862678,1635.195132,2630.987183,4266.182316,Not Examined,Present,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 80 %,Positive : 90 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,CK7 - positive; Vimentin - positive;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months,Yes,Living,27,Unknown,Unknown,Unknown,Unknown Tumor Status,Unknown,Unknown,Unknown,Unknown,Unknown,2,Unknown,Unknown,Unknown,n/a,Unknown,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a,n/a
+C3N-01761,C3N-01761,No,b3,9,128N,CPT0116710003,Tumor,No,69,Female,Endometrioid carcinoma,G3 Poorly differentiated,4.5,162,72,27,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,12,Stage I,NA,2.000247062,CNV_L,No,22.53,MSI-H,MSI-H,624,WT,Mutated,WT,Mutated,WT,7.35E-02,9.41E-03,0,7.30E-18,1.35E-18,9.69E-19,3.91E-19,5.84E-20,2.64E-18,0,2.77E-18,2.02E-03,1.06E-02,0,9.71E-23,2.61E-03,4.00E-19,0,2.79E-18,2.06E-02,0,6.01E-04,4.02E-04,1.98E-04,3.67E-18,2.57E-17,0,0,6.10E-18,2.31E-02,1.01E-02,2.14E-02,0,0.148230371,0.133913,2.43E-18,6.41E-03,1.31E-03,7.71E-03,4.92E-02,0,0.108184728,0.122038116,0,0.221708232,0,0.118366604,0,0,0,5.94E-02,7.06E-03,4.36E-02,0.112296479,9.26E-02,0,0,0,8.40E-03,3.56E-03,5.36E-02,58,71,0,0,0,0,160,65,0,0,53,0.69,NA,NA,-0.594911418,1.032012483,-1.096912147,1.401428603,0.9708803,1.339458804,0.157450776,-0.785718131,1.358407618,-1.262712613,-9.68E-03,-0.903377582,0.971890694,-0.277578463,2707.093154,4003.392608,6710.485762,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Multifocal,0,0,1,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,Vimentine positive,NA,No,Lifelong non-drinker,Current smoker: Includes daily and non-daily smokers,Unknown,NA,20,NA,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Bisocard|milurit|effox long|diuresin|Fragmin|fortrans|kalipoz,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,244|839|979|1338,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|With Tumor|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Unknown|Never,Yes|Yes|Yes|Yes,No|No|No|No,1|1|1|1,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,Yes|Yes|Yes|Yes,New Primary Tumor|New Primary Tumor|New Primary Tumor|New Primary Tumor,Other: Kidney|Other: Kidney|Other: Kidney|Other: kidney,113.0|113.0|113.0|113.0,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,No|No|No|No,264.0|859.0|999.0|1358.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,113.0|113.0|113.0|113.0,Surgical Resection|Surgical Resection|Surgical Resection|Surgical Resection,R0: No Residual Tumor|R0: No Residual Tumor|R0: No Residual Tumor|R0: No Residual Tumor
+C3N-01764,C3N-01764,No,b2,7,128C,CPT0181320003,Tumor,No,73,Female,Serous carcinoma,G3 Poorly differentiated,6,158,75,30,Present,<50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN0,11,Stage II,NA,14.69558014,CNV_H,No,7.00E-02,MSS,CNV_H,64,Mutated,WT,WT,WT,Mutated,0.190726861,7.55E-02,1.67E-02,3.89E-03,1.96E-03,8.26E-18,3.27E-03,9.01E-03,1.74E-02,1.01E-02,5.00E-03,0,3.07E-02,0,1.14E-02,1.81E-18,0,8.66E-18,0,1.54E-02,3.38E-02,2.26E-02,2.21E-02,1.86E-03,3.26E-03,4.18E-03,1.05E-02,1.17E-20,1.18E-03,9.15E-03,2.42E-02,2.01E-02,9.29E-18,3.68E-02,0.25858588,7.24E-19,9.81E-02,5.24E-18,9.81E-02,7.31E-02,0,4.77E-02,0.108187785,0,0.114452992,3.87E-02,9.76E-02,0,0,0,5.85E-02,4.87E-03,4.11E-02,0.110210276,0.208625266,0,5.27E-02,0,1.94E-02,1.31E-02,1.18E-02,12,36,0,12,0,0,0,0,0,0,0,0.35,NA,NA,-0.286215717,0.380558766,-0.228724485,-0.203270537,2.099036814,8.69E-02,0.859025866,-0.737063438,5.87E-03,-0.429555029,0.62996958,-0.156939859,-2.61E-02,0.838316501,3940.012075,5240.349808,9180.361883,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Other,Isthmus of uterine corpus and cervical canal,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : 10 %,Positive : 10 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,"Ki67 positive 50%, Vimentin positive, SMA negative, CD10 - positive/negative, Desmine- negative, Bcl-2 - negative, EMA - positive/negative, CK7 positive, CK8/18 - positive, CK19 - positive",NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Bisocard|Euthyrox|Fragmin|Fortrans|Natrium kalium|Kalipoz|Aldac,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,386|816|1201|1346,Yes|Yes|No|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,406.0|836.0|1221.0|1366.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01765,C3N-01765,No,b1,4,130C,CPT0116940003,Tumor,No,58,Female,Endometrioid carcinoma,G2 Moderately differentiated,8,158,65,26,Present,=50% myometrial invasion,Seventh Edition (2010),pT3a (FIGO IIIA),pN1 (FIGO IIIC1),6,Stage III,NA,3.70E-03,CNV_L,No,14.61,MSI-H,MSI-H,771,WT,Mutated,Mutated,Mutated,Mutated,0.246545802,1.52E-02,6.86E-03,2.69E-18,0,1.69E-18,4.85E-02,1.17E-02,8.94E-02,0.114629111,4.22E-03,4.72E-03,1.16E-02,0,7.04E-03,0,2.69E-18,3.92E-03,0,1.81E-02,2.25E-03,1.11E-02,6.02E-03,2.77E-03,0,1.79E-18,5.35E-03,0,5.69E-18,1.66E-02,3.23E-02,2.10E-02,1.10E-18,8.01E-02,0.142359302,0,7.78E-02,1.96E-03,7.97E-02,1.97E-02,0,6.14E-02,0.134433128,0,0.261287416,2.98E-03,8.83E-02,4.97E-03,8.31E-02,0,4.02E-02,0,6.18E-02,9.54E-02,0.117614713,0,0,2.87E-02,0,0,0,95,176,0,0,0,0,160,0,72,0,123,0.4,NA,NA,0.138864277,0.316001016,0.692017961,0.825944117,0.30805158,-3.20E-02,-0.799914545,-0.424941507,-0.125753229,0.750546401,-0.955156205,0.173068789,-0.561494049,-5.88E-02,5013.704003,5217.161207,10230.86521,Not Examined,Present,Cannot be assessed,NA,Caucasian,Poland,Fundus,NA,Unifocal,IHC staining not done,0,2,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,metronidazol|Cipronex|Fragmin|kalipoz,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,259|484|988|1352,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,No|No|No|No,No|No|No|No,3|3|3|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,271.0|496.0|1000.0|1364.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01766,C3N-01766,No,b1,2,128C,CPT0226810003,Tumor,No,63,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,165,108,39,Not identified,NA,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,NA,0,CNV_L,No,0.01,MSS,CNV_L,52,WT,Mutated,WT,Mutated,Mutated,9.03E-02,0,0,0,8.36E-19,0,0,2.13E-03,0,8.21E-03,0,0,2.16E-02,2.59E-19,3.61E-03,2.73E-02,2.60E-19,0.143591591,0,0.125117983,3.23E-02,2.27E-02,1.35E-02,1.76E-03,4.41E-18,4.62E-03,0,3.30E-03,1.29E-17,3.01E-03,2.17E-02,0,0,4.92E-02,9.14E-02,0,3.04E-02,9.03E-02,0.12067751,0,0,5.74E-02,3.91E-02,0,0.136205481,0,2.06E-02,0,0,3.28E-02,4.07E-02,6.15E-02,8.77E-03,4.84E-03,0.360359682,4.23E-02,2.23E-02,4.40E-02,6.55E-03,1.93E-02,0.103249658,23,24,0,0,0,0,0,0,0,0,0,0.33,Yes,No,-0.212399685,-0.179274308,0.514966703,-0.1912367,-0.549840059,-1.46E-02,0.326598797,0.865885353,-1.060837629,9.72E-02,0.406557973,0.163027314,-0.82872889,0.756589656,5172.775259,4941.366454,10114.14171,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Euthyrox|Nebilet|Metformax|Gensulin R|Gensulin N|Jardiance|Pramolan|Valsacor|Tertensif SR|Nebilet|Dulsevia,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|Yes,Living|Living,372|520,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,Yes|Yes,3|2,No|No,Complete Remission|Complete Remission,Complete Remission|Unknown,n/a|n/a,No|Unknown,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,372.0|520.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01830,C3N-01830,No,b3,12,128N,CPT0097300003,Tumor,No,73,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.5,163,72,27.1,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,5,Stage I,NA,0.121195139,CNV_L,No,0.11,MSS,CNV_L,67,WT,Mutated,Mutated,Mutated,WT,1.16E-18,2.43E-03,7.51E-20,0,0,1.91E-18,1.98E-18,9.54E-03,1.69E-02,2.20E-02,0,0,6.54E-03,1.87E-18,4.08E-21,1.19E-03,8.84E-19,5.68E-02,1.48E-18,2.47E-02,0,0,7.39E-04,4.06E-03,1.48E-19,2.26E-17,3.67E-03,1.37E-18,0,0,2.56E-02,1.10E-02,8.69E-19,2.09E-02,5.22E-02,0,1.56E-02,2.90E-02,4.46E-02,0,0,8.43E-02,9.39E-02,0,0.191293251,0,0,2.43E-02,0.121978416,0,4.96E-02,0,6.59E-02,3.86E-02,0.156815348,0,2.17E-02,7.53E-02,0,0,7.63E-02,28,34,0,0,0,0,0,0,0,0,0,0.17,NA,NA,1.144298628,-0.244991698,0.277131716,0.359918433,-0.270113667,0.386533568,-0.764118355,7.87E-02,-0.377960293,0.214227381,-0.785101313,5.41E-02,-0.961078663,1.142328869,3244.306649,2922.030539,6166.337188,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 90 %,Positive : 90 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,CK7 - positive; Vimentin - positive;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,228|716,Yes|Yes,Yes|Yes,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,Unknown|Unknown,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,192.0|680.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01843,C3N-01843,No,b3,9,127N,CPT0098270003,Tumor,No,68,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.4,164,64,23.8,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN1 (FIGO IIIC1),10,Stage III,NA,0.282470823,CNV_L,No,18,MSI-H,MSI-H,681,Mutated,Mutated,WT,Mutated,Mutated,0.223526843,3.01E-02,1.31E-02,0,4.93E-18,5.83E-18,1.80E-02,3.31E-03,1.33E-02,4.66E-02,0,8.84E-03,2.50E-02,1.01E-18,1.21E-02,0,8.36E-19,9.35E-18,3.42E-18,2.66E-02,2.73E-02,2.74E-02,6.44E-03,5.67E-03,1.34E-19,3.79E-18,5.54E-03,5.42E-19,4.95E-18,3.21E-02,7.54E-02,8.41E-03,0,4.49E-02,0.247032703,8.98E-19,5.90E-02,4.67E-18,5.90E-02,0,0,2.72E-02,0.216393599,0,0.128096267,0,8.38E-02,0,0,0,6.90E-02,3.29E-02,4.84E-02,9.27E-02,0.248276041,1.97E-02,0,3.36E-02,0,0,0,43,13,216,0,0,0,0,152,0,0,71,0.51,NA,NA,4.43E-02,0.144663114,-7.23E-02,5.24E-02,-0.547351059,0.534132119,1.47E-02,-0.311919732,0.56525262,-2.62E-02,2.85E-02,0.137575459,0.701965702,0.115626401,3834.743959,5059.640457,8894.384416,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,1,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 20 %,Positive : 20 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,CK7 - positive; Vimentin - patchy positive;,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Current smoker: Includes daily and non-daily smokers,43,NA,10,12.5,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,No,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,201|738,Yes|Yes,No|No,No|No,Tumor Free|With Tumor,Never|Never,Never|Never,Never|Never,No|No,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Persistent Disease,n/a|n/a,No|Yes,n/a|Distant Metastasis,n/a|Other: Peritoneal carcinomatosis,n/a|706.0,n/a|No,n/a|No,n/a|Yes,n/a|No,165.0|702.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01847,C3N-01847,No,b1,3,130C,CPT0098920003,Tumor,No,57,Female,Endometrioid carcinoma,G1 Well differentiated,1.4,163,116,43.66,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,NA,3.70E-03,CNV_L,No,0.03,MSS,CNV_L,84,WT,Mutated,WT,Mutated,Mutated,7.60E-02,7.94E-03,8.91E-04,0,0,0,2.50E-02,2.35E-04,1.02E-19,1.14E-02,3.11E-19,1.48E-02,2.61E-02,0,2.80E-03,1.53E-02,4.03E-03,1.17E-18,3.06E-18,4.48E-02,5.96E-03,4.44E-03,5.40E-03,4.90E-04,2.19E-18,4.98E-03,3.19E-03,4.29E-04,0,6.23E-04,4.76E-03,5.31E-04,3.54E-18,4.36E-02,7.69E-02,0,1.78E-02,7.63E-03,2.54E-02,2.28E-03,0,4.47E-03,3.78E-02,0,0.280090609,0,1.52E-02,4.91E-02,0,2.66E-02,0,3.17E-04,2.13E-03,3.85E-02,0.309974614,8.53E-02,1.07E-02,7.10E-02,0,0,6.66E-02,26,33,0,0,0,0,20,0,0,0,0,0.24,NA,NA,-0.203958163,0.354768534,0.697546386,0.636621984,-0.105211027,0.664941363,0.597701371,1.192832327,-0.627929428,-0.384998838,0.566847748,0.695395794,-0.358632918,1.260857813,3847.422934,4238.491726,8085.91466,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 80 %,Positive : 80 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,CK7 - positive; Vimentin - positive;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months,Yes,Living,32,No,No,No,Tumor Free,Never,Never,Never,Yes,No,Unknown,No,Complete Remission,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,11,n/a,n/a,n/a,n/a,n/a
+C3N-01848,C3N-01848,No,b3,12,131C,CPT0099050003,Tumor,No,68,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.3,160,77,30.08,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,NA,0.254035118,CNV_L,No,3.44,MSS,CNV_L,320,WT,Mutated,WT,Mutated,WT,7.70E-02,1.36E-02,0,2.58E-18,0,2.32E-18,8.62E-03,4.51E-19,5.40E-19,0,5.49E-19,2.80E-19,5.58E-03,0,2.69E-03,2.80E-02,1.23E-18,1.04E-02,0,5.98E-02,1.12E-02,1.14E-02,8.93E-03,3.32E-03,0,2.41E-18,4.92E-03,0,7.35E-20,2.34E-02,7.03E-03,2.29E-02,0,3.13E-02,0.131746335,0,2.05E-02,1.92E-02,3.98E-02,1.53E-02,0,0.138733616,8.36E-02,0,0.263437163,0,2.35E-02,1.45E-02,0,1.72E-02,0,0,6.49E-02,5.09E-02,0.285224525,1.38E-02,1.10E-02,0,4.72E-03,8.52E-04,1.25E-02,8,35,113,0,0,0,0,50,0,0,27,0.2,NA,NA,1.139570918,-0.531494785,-0.962600331,2.147275985,-9.99E-02,-0.502091483,-0.188633044,-0.891483434,-0.156879223,0.417927495,-6.95E-02,0.412675045,-0.163980249,-0.799125931,4904.734919,4328.776721,9233.51164,Not Examined,Present,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 15 %,Positive : 15 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,2,CK7 - positive; Vimentin - positive;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,No,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,193|709,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,2|2,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,172.0|688.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-01871,C3N-01871,No,b4,13,130C,CPT0117010004,Tumor,No,77,Female,Endometrioid carcinoma,G2 Moderately differentiated,2.4,149,80,36,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,NA,1.09E-03,CNV_L,No,10.85,MSI-H,MSI-H,683,WT,Mutated,WT,Mutated,Mutated,6.25E-02,3.82E-03,0,9.99E-19,0,7.50E-19,1.12E-02,0,0,1.26E-02,0,8.23E-03,8.10E-03,0,1.39E-02,2.33E-19,1.07E-19,0,2.79E-18,2.14E-02,3.31E-03,7.85E-03,2.88E-03,3.68E-03,4.70E-18,0,1.15E-03,7.57E-19,2.61E-18,2.08E-03,2.21E-02,7.01E-03,2.44E-18,3.25E-02,0.118236283,8.15E-19,1.65E-02,2.14E-04,1.67E-02,3.86E-02,0,0.100928794,0.135339536,0,0.122688657,5.80E-02,9.27E-02,0,9.91E-03,0,1.77E-02,0,3.71E-02,4.53E-02,0.302402469,1.76E-02,0,0,9.55E-03,2.79E-03,9.40E-03,0,47,294,0,0,0,0,91,0,0,74,0.27,NA,NA,0.302900335,0.516058367,0.292252452,1.083780096,-1.083293559,0.476643996,-0.47464658,0.42683152,-6.25E-02,-0.164209424,-0.388869335,-0.35724918,0.36279121,1.213801812,3831.676349,4112.023105,7943.699453,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Yes,Self Report|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,co-valsacor|kalipoz|Fragmin|Citrafleet|Iporel,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,243|684|963|1412,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,No|No|No|No,3|3|3|3,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,253.0|694.0|973.0|1422.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01873,C3N-01873,No,b4,16,127N,CPT0117230003,Tumor,No,71,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,164,81,30,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,3,Stage I,NA,1.38E-02,CNV_L,No,0.03,MSS,CNV_L,59,WT,Mutated,Mutated,WT,WT,8.33E-02,1.05E-02,1.14E-18,0,5.14E-18,7.84E-03,1.99E-02,0,1.58E-19,0,0,4.95E-03,0,0,0,4.90E-03,4.86E-19,1.28E-18,0,3.43E-03,7.45E-03,1.37E-02,3.29E-03,6.46E-03,0,9.67E-19,4.38E-03,0,7.33E-18,4.05E-02,2.96E-02,1.70E-02,4.03E-18,1.53E-02,2.32E-02,2.23E-03,1.63E-02,2.45E-03,1.87E-02,0,0,0.173787939,0.114182112,0,0.116560122,0,4.27E-02,3.14E-02,0,3.82E-03,4.17E-02,4.45E-02,0.156313502,4.00E-02,0.140850526,0,0,0,8.17E-02,0,1.26E-02,15,18,0,0,0,0,16,0,0,0,0,0.75,NA,NA,0.106481953,0.43067387,-0.9722897,1.06846311,-0.216816855,0.300596224,0.470600851,0.290423913,0.552485217,-0.815837183,0.521103887,-0.243989694,0.206061519,1.169256133,2190.410018,4249.679464,6440.089482,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Amlopin|Euthyrox|Metocard|Lorista H|Fragmin,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,361|606|959|1325,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,375.0|620.0|973.0|1339.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01874,C3N-01874,No,b4,16,129C,CPT0117300003,Tumor,No,84,Female,Endometrioid carcinoma,G1 Well differentiated,2.5,158,76,30,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,NA,1.800134566,CNV_L,No,0.12,MSS,CNV_L,58,WT,Mutated,Mutated,Mutated,Mutated,1.52E-02,1.17E-18,2.86E-18,3.41E-18,1.45E-18,0,1.56E-17,0,6.10E-18,1.23E-17,1.05E-18,1.33E-18,0,1.19E-18,1.00E-19,0,0,1.82E-17,0,0,0,7.99E-20,1.07E-19,1.14E-02,0,1.96E-17,3.50E-03,0,6.22E-17,3.22E-02,1.59E-02,1.58E-02,2.29E-18,6.37E-02,2.03E-19,4.50E-19,7.62E-03,9.13E-18,7.62E-03,0,0,7.05E-02,6.17E-02,0,0.142400803,0,9.16E-02,3.51E-02,0,0,2.84E-02,2.52E-02,6.77E-02,0,0.340888389,0,5.24E-02,8.43E-02,0,0,0,13,17,0,0,0,0,24,0,0,0,0,0.92,Yes,No,0.244211136,-1.414841471,-0.925858253,-0.153620361,-0.502347154,-1.089310841,-2.733835426,-0.859543811,-0.500990905,-0.608425221,-2.485675847,-1.325090731,1.86172286,-0.201595836,1021.173704,1322.951831,2344.125535,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Insulatard|Mixtard|Fragmin|nitrendypina|Tarcefoksym|Potassium Chloride,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Deceased,398|817|1027|1027,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,3|3|3|3,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,410.0|829.0|1039.0|n/a,n/a|n/a|n/a|Respiratory Disease,n/a|n/a|n/a|1267.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01875,C3N-01875,No,b1,2,127C,CPT0117370004,Tumor,No,66,Female,Endometrioid carcinoma,G1 Well differentiated,3.5,165,67,24,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,NA,4.90E-02,CNV_L,No,13.51,MSI-H,MSI-H,432,WT,Mutated,WT,Mutated,Mutated,9.06E-02,4.35E-03,3.23E-19,9.90E-18,0,0,0,0,0,8.78E-18,1.24E-18,1.64E-02,6.04E-03,4.24E-18,0,3.92E-18,6.19E-18,1.67E-17,4.29E-18,2.73E-02,4.62E-19,1.29E-03,5.24E-03,4.99E-03,1.80E-19,2.29E-17,2.03E-03,1.90E-18,3.49E-19,1.98E-02,1.10E-02,4.26E-03,1.19E-18,8.04E-02,3.62E-02,7.16E-19,6.23E-03,1.03E-17,6.23E-03,0,0,1.24E-02,0.157910704,0,0.246611062,0,6.92E-02,2.93E-04,0,0,5.50E-02,0.100858138,0,2.02E-02,0.241064221,3.03E-02,3.34E-02,0,6.70E-03,4.79E-03,2.13E-02,5,45,262,0,0,0,0,0,0,0,0,0.82,NA,NA,-1.363689812,-4.89E-02,-1.529508912,-0.553215043,-0.634025083,0.390767965,-0.119760407,-0.111178363,0.344065615,-1.171730701,-7.18E-02,-0.597194535,1.768138045,0.172848677,2183.985888,3574.391886,5758.377774,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Other,left side of uterine cavity,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Self-reported and Medical Record,Yes,Medical Record,citrafleet,NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,48,No,No,No,Tumor Free,Never,Never,Never,No,No,2,No,Complete Remission,Complete Remission,n/a,Unknown,n/a,n/a,n/a,n/a,n/a,n/a,n/a,66,n/a,n/a,n/a,n/a,n/a
+C3N-01876,C3N-01876,No,b4,15,128N,CPT0117440003,Tumor,No,65,Female,Endometrioid carcinoma,G2 Moderately differentiated,6,160,76,29,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,11,Stage I,NA,0,CNV_L,No,30.86,MSI-H,MSI-H,1200,WT,Mutated,Mutated,Mutated,WT,0.121096139,2.94E-03,0,4.68E-18,0,1.44E-18,0,3.85E-03,0,0,1.70E-18,0,9.28E-03,0,4.75E-03,1.14E-18,0,3.16E-18,4.08E-18,0,2.45E-03,8.54E-03,3.41E-03,8.83E-03,0,5.97E-18,0,0,2.59E-17,2.46E-02,5.26E-02,2.27E-02,7.02E-03,0.145036196,0.153952945,3.74E-18,1.26E-02,3.12E-18,1.26E-02,1.21E-02,0,8.37E-02,0.113594656,0,0.170929985,2.97E-02,0.129824121,0,3.77E-02,0,4.75E-02,0,0.122970951,7.81E-02,0.167485468,3.59E-03,0,2.75E-03,0,0,0,4,128,256,0,0,0,257,110,119,0,0,0.66,NA,NA,0.559073275,0.680713095,-0.752155232,0.647809148,-0.702791641,0.933029185,-0.500273297,0.931540716,1.020084157,-0.395834595,-0.375792936,-0.910458356,0.450531888,0.578826112,2723.908546,4237.496025,6961.404571,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,0,0,3,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Lokren|Vanatex|Lexotan|Acard|fragmin|Fortrans,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,392|683|1071|1509,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,409.0|700.0|1088.0|1526.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-01877,C3N-01877,No,b2,8,127N,CPT0117510003,Tumor,No,73,Female,Endometrioid carcinoma,G2 Moderately differentiated,8,163,100,37,Present,=50% myometrial invasion,Seventh Edition (2010),pT3b (FIGO IIIB),pN2 (FIGO IIIC2),17,Stage III,NA,5.61E-03,CNV_L,No,11.86,MSI-H,MSI-H,726,WT,Mutated,WT,Mutated,Mutated,0.177404359,3.49E-02,7.77E-04,0,4.02E-18,5.17E-19,6.30E-02,0,1.63E-02,1.90E-02,1.99E-18,3.61E-03,1.62E-02,5.18E-19,7.60E-03,2.04E-02,1.97E-03,4.46E-02,2.95E-18,6.93E-02,1.09E-02,9.92E-03,1.08E-02,1.00E-02,5.72E-04,3.74E-03,7.22E-03,6.81E-20,1.08E-19,3.98E-02,2.16E-02,2.29E-02,6.13E-20,6.77E-02,4.62E-02,6.13E-03,5.70E-02,3.42E-02,9.11E-02,0,2.63E-02,0.114976296,5.65E-02,0,0.298018221,0,5.30E-02,0,0,4.07E-03,8.95E-03,8.79E-03,4.93E-02,7.36E-02,0.172018072,6.28E-02,0,6.47E-02,0,0,6.91E-03,10,145,147,0,0,0,127,114,0,0,0,0.17,NA,NA,-3.17E-02,1.77E-02,3.37E-02,-0.659912822,-1.29076585,-0.103905023,7.33E-02,-0.816857239,-0.29263442,0.376151556,-0.11418628,0.238528748,-0.47811667,0.441443599,4764.458571,5173.015659,9937.47423,Not Examined,Not identified,Margin(s) involved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,4,4,5,1,1,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Current smoker: Includes daily and non-daily smokers,Unknown,NA,20,NA,Yes,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Atrovent|Flutixon|Enarenal|Zafiron|fragmin|Cipronex|Metronidazol|Fortrans,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Deceased,397|678|678,No|No|No,Yes|Yes|Yes,No|No|No,With Tumor|With Tumor|With Tumor,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,No|No|No,3|3|3,No|No|No,Persistent Disease|Persistent Disease|Patient Deceased,Persistent Disease|Persistent Disease|Patient Deceased,n/a|n/a|n/a,Yes|Yes|No,Distant Metastasis|Distant Metastasis|n/a,Other: vaginal cuff and paraortic lymph nodes|Other: rectum and paraortal lymph nodes|n/a,195.0|604.0|n/a,No|No|n/a,No|No|n/a,Yes|Yes|n/a,No|No|n/a,412.0|693.0|n/a,n/a|n/a|Uterine Corpus Cancer,n/a|n/a|912.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-01878,C3N-01878,No,b2,5,129C,CPT0117560003,Tumor,No,59,Female,Endometrioid carcinoma,G2 Moderately differentiated,10.5,170,106,36,Present,=50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN2 (FIGO IIIC2),6,Stage II,NA,0.6351284,CNV_L,No,20.15,MSI-H,MSI-H,837,WT,Mutated,WT,Mutated,Mutated,0.199708337,2.28E-02,8.86E-03,0,1.19E-18,0,5.52E-02,8.07E-21,3.10E-04,4.05E-02,0,4.42E-02,1.95E-02,1.98E-18,2.88E-02,4.75E-19,1.20E-18,0,1.93E-18,7.79E-03,2.89E-02,2.82E-02,5.85E-03,5.89E-04,0,2.20E-03,3.47E-18,0,2.38E-18,1.85E-02,6.86E-02,1.76E-02,5.34E-18,4.04E-02,0.151555088,3.31E-02,5.57E-02,2.37E-19,5.57E-02,7.27E-03,0,1.94E-02,0.125281925,0,0.214293137,0,3.11E-02,2.34E-02,0,0,8.62E-02,1.21E-02,0.160409928,4.09E-02,0.244346272,1.44E-02,0,0,1.61E-02,4.78E-03,0,25,74,285,0,0,0,0,203,0,0,0,0.55,Yes,No,-0.181577593,1.397289792,0.635311483,1.162459583,-0.616480681,0.98083274,0.669498781,0.829350251,0.762932699,0.239677216,0.519901832,1.391960865,0.982448781,-0.881420623,5148.669003,5726.274774,10874.94378,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,IHC staining not done,1,24,IHC staining not done,1,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,"ER(+), PR(+), p53(-), panCK, CK7, p16, CK5/6",NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self-reported and Medical Record|Self-reported and Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record,Bisopromerck|metformax,NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,162,Yes,Yes,No,Tumor Free,Never,Never,Never,Yes,Yes,2,No,Complete Remission,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,188,n/a,n/a,n/a,n/a,n/a
+C3N-01879,C3N-01879,No,b2,8,127C,CPT0226860003,Tumor,No,67,Female,Endometrioid carcinoma,G1 Well differentiated,4,164,104,38,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,7,Stage I,NA,0,CNV_L,Yes,7.00E-02,MSS,POLE,2239,Mutated,Mutated,WT,Mutated,Mutated,0.137455678,4.02E-02,1.41E-02,3.66E-18,9.86E-20,0,4.28E-02,0,7.38E-03,6.30E-02,9.25E-18,7.35E-03,1.82E-02,0,8.02E-03,1.48E-03,0,9.46E-18,0,1.38E-02,3.08E-02,2.14E-02,1.09E-02,6.56E-06,3.93E-03,1.22E-02,1.30E-02,4.87E-03,3.27E-18,1.98E-02,6.48E-02,2.11E-02,0,3.09E-02,4.58E-02,2.86E-02,6.91E-02,7.39E-04,6.98E-02,1.35E-02,0,2.50E-02,7.75E-02,0,0.227319109,9.34E-02,6.00E-02,0,0,0,1.57E-02,0,0.173488158,0.106049337,0.129079703,7.92E-03,0,5.35E-03,0,0,6.57E-02,169,126,0,0,492,1440,0,0,0,0,0,0.55,NA,NA,-0.152512497,1.429161576,1.091894133,3.201166668,-6.38E-02,1.536337968,1.483652505,-0.92167649,0.836126569,-0.909618405,1.498413183,0.17598968,0.142423981,1.59541048,4156.428126,5829.00019,9985.428316,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self Report|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Euthyrox|Atoris|Aldan,NA,NA,NA,NA,Yes,1,12 Months,Yes,Living,489,No,No,No,Tumor Free,Never,Never,Never,Yes,No,2,No,Complete Remission,Unknown,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,489,n/a,n/a,n/a,n/a,n/a
+C3N-01880,C3N-01880,No,b4,14,131N,CPT0244510003,Tumor,No,68,Female,Endometrioid carcinoma,G2 Moderately differentiated,6,157,76,30,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,13,Stage I,NA,5.74E-03,CNV_L,No,2.46,MSS,CNV_L,169,WT,Mutated,WT,WT,Mutated,0.174442329,3.33E-02,1.16E-02,5.53E-18,0,1.28E-18,3.50E-02,9.48E-04,1.97E-02,2.47E-02,4.55E-18,5.53E-03,1.97E-02,6.66E-03,1.89E-02,1.03E-02,0,3.13E-18,0,5.43E-02,3.95E-02,2.55E-02,2.40E-02,1.30E-02,1.30E-19,3.38E-18,1.06E-02,0,0,1.46E-02,3.16E-02,1.82E-02,0,3.81E-02,3.68E-02,2.32E-02,8.29E-02,5.16E-03,8.81E-02,6.19E-03,0,6.65E-02,6.73E-02,0,0.242474543,0,3.09E-02,0,1.56E-02,0,1.82E-02,0,9.68E-02,0.105584982,0.275675943,5.71E-03,0,6.92E-02,0,0,0,26,43,0,0,0,0,55,0,0,0,0,0.56,NA,NA,0.70910867,-1.400986602,2.046975559,2.73E-02,-2.65E-03,-1.50845047,0.203122598,3.34E-02,-0.623237411,-0.769186317,0.191999416,0.755493383,-0.607070106,-0.259883536,4534.7251,5118.019807,9652.744907,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,0,0,5,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record,Yes,Medical Record|Medical Record,Imovate|Vit B2,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,411|745|1074|1531,No|No|No|Yes,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,No|No|No|No,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,411.0|745.0|1074.0|1531.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-02012,C3N-02012,No,b1,4,130N,CPT0115470003,Tumor,No,69,Female,Endometrioid carcinoma,G2 Moderately differentiated,3,173,126,42.11,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,30,Stage I,White,0,CNV_L,No,0.13,MSS,CNV_L,39,WT,Mutated,Mutated,Mutated,WT,0.10837779,2.33E-02,0,0,0,0,2.77E-02,0,3.69E-19,1.66E-02,4.50E-19,1.87E-19,6.36E-03,0,3.77E-03,5.12E-03,0,0,5.32E-19,3.07E-19,1.03E-02,1.17E-02,5.31E-03,4.45E-03,0,0,4.26E-03,4.59E-19,0,2.99E-02,2.66E-02,1.32E-02,0,9.35E-02,0.104035137,3.39E-03,2.79E-02,2.56E-03,3.04E-02,2.38E-03,6.60E-03,0.100506775,2.23E-02,0,0.193876697,0,0.114171638,1.65E-02,8.98E-04,0,0.103761073,3.46E-02,6.48E-02,2.79E-02,0.182560871,8.04E-03,2.33E-02,0,4.64E-02,3.03E-03,4.85E-02,9,24,0,0,0,0,0,0,0,0,0,0.31,NA,NA,-0.530721444,1.171436913,-0.331317798,0.100828289,0.158560762,0.533353021,1.460192614,-0.436046742,0.655455465,-0.683019213,1.427569954,-0.314388587,0.463815547,-9.64E-02,3396.330521,4983.009654,8379.340175,Atypical and/or suspicious,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,white,United States,Other,Involves anterior and posterior,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,aspirin|lipitor|temovate|docusate|hydroxychloroquine|levothyroxine|metaprolol,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,398|762|1126|1490,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Unknown|Unknown|Unknown|Unknown,Unknown|Unknown|Unknown|Unknown,Yes|Yes|Yes|Yes,No|No|No|No,1|1|1|1,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,378.0|742.0|1106.0|1470.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-02027,C3N-02027,No,b2,7,127N,CPT0133400008,Tumor,No,55,Female,Endometrioid carcinoma,G3 Poorly differentiated,1.5,161,62,23.92,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,10,Stage I,NA,0.836262984,CNV_L,No,29.19,MSI-H,MSI-H,1588,Mutated,Mutated,WT,Mutated,Mutated,0.13465926,1.32E-02,6.68E-03,1.39E-18,0,2.20E-18,1.18E-02,1.20E-02,0,1.22E-03,1.57E-18,3.73E-03,2.35E-02,3.44E-19,3.51E-03,0,0,0,0,2.73E-03,2.85E-02,2.99E-02,6.40E-03,3.08E-03,0,0,2.52E-03,0,4.50E-18,1.53E-02,5.43E-02,3.77E-03,0,9.58E-02,0.191302974,4.18E-18,3.22E-02,0,3.22E-02,1.70E-02,0,2.71E-02,4.57E-02,0,0.236501993,3.10E-02,5.83E-02,0,1.41E-02,0,4.11E-02,2.91E-03,0.120051349,0.191009379,0.194287952,0,0,9.09E-03,0,0,1.19E-02,281,255,0,0,0,0,406,263,0,0,0,0.22,NA,NA,-1.620645881,1.030313371,-1.867447847,1.518661251,-1.110265741,0.865446114,0.804264842,-0.359433287,1.926234681,0.573562232,0.55489661,-1.086838292,4.40E-02,-0.319440063,3784.602642,4222.588922,8007.191565,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 80 %,Negative,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,CK7 - positive; Vimentin - patchy positive; ER - 80% (+++); PR - negative;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,498|874,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,No|No,No|No,Unknown|Unknown,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,440.0|816.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-02028,C3N-02028,No,b1,3,128N,CPT0133600003,Tumor,No,68,Female,Endometrioid carcinoma,G3 Poorly differentiated,1.3,160,103,40.23,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,10,Stage I,NA,0.94592098,CNV_L,No,16.98,MSI-H,MSI-H,854,WT,Mutated,WT,Mutated,Mutated,0.137349875,6.08E-03,1.45E-02,8.40E-19,2.55E-18,0,1.54E-03,1.23E-02,4.24E-02,4.63E-02,0,0,3.00E-02,1.67E-18,1.97E-02,2.67E-02,0,8.72E-02,3.34E-18,0.171591381,1.98E-02,1.96E-02,5.55E-03,3.66E-03,3.12E-18,0,7.02E-03,3.97E-19,5.56E-18,8.39E-19,4.58E-02,9.70E-03,0,6.20E-03,0.29710689,2.24E-19,6.11E-02,6.08E-02,0.121915215,1.82E-02,0,2.64E-02,0.155186169,0,0.173452888,6.00E-02,6.69E-02,0,1.17E-02,0,2.01E-02,0,0.110151944,0.100243419,0.165907003,0,0,0,6.10E-02,1.98E-02,1.10E-02,0,51,377,0,0,0,0,109,0,0,93,0.16,NA,NA,0.650007282,-8.93E-02,-0.188888952,2.38E-02,-8.43E-02,0.377410318,0.783005019,0.421413623,0.896518726,0.700052389,0.673144646,1.335396533,-1.52790344,0.469251306,6118.023607,5850.288699,11968.31231,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 60 %,Positive : 20 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,CK7 - positive; Vimentin - patchy positive; ER - 60%; PR - 20%;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02030,C3N-02030,No,b1,4,127C,CPT0133770008,Tumor,No,62,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.4,163,131,49.31,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pNX,0,Stage I,NA,0,CNV_L,No,12.53,MSI-H,MSI-H,509,WT,Mutated,Mutated,Mutated,WT,2.01E-18,1.47E-19,0,7.53E-18,2.96E-18,0,0,2.19E-19,3.11E-19,5.32E-18,0,0,5.55E-03,0,7.17E-19,1.19E-18,1.21E-18,2.32E-02,0,1.48E-04,0,6.33E-19,0,2.21E-02,0,1.63E-18,6.42E-03,0,0,4.27E-02,3.72E-03,1.38E-02,0,3.09E-02,0.106855103,2.14E-19,1.47E-02,1.37E-02,2.85E-02,1.84E-02,0,0.100547164,5.72E-03,0,0.174504876,0,0,6.92E-02,0,9.15E-02,0,0,0.326854429,0,8.54E-02,0,3.66E-02,0,9.12E-02,0,0,24,64,187,0,0,0,0,54,0,0,50,0.66,NA,NA,1.156356082,1.50100684,0.652103028,1.076495637,-2.130746153,1.268860898,0.252439451,0.981173532,0.283160486,1.172658794,0.448894477,1.278679606,-0.978229726,1.82680905,3771.888795,2333.565376,6105.454171,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,0,0,cM0,R0: No residual tumor,Positive : 70 %,Positive : 60 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,CK7 - positive; Vimentin - patchy positive; ER - 70%; PR - 60%,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,270|808,Yes|Yes,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|No,Unknown|Unknown,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,284.0|822.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-02061,C3N-02061,No,b1,2,129C,CPT0134310008,Tumor,No,67,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.7,150,92,40.89,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,10,Stage I,NA,6.67E-03,CNV_L,No,9.86,MSI-H,MSI-H,461,WT,Mutated,WT,WT,Mutated,0.157836188,1.89E-02,1.43E-02,0,1.80E-19,2.43E-18,2.64E-03,9.82E-03,2.36E-02,1.44E-02,0,0,2.41E-02,1.64E-21,7.12E-03,1.83E-03,7.89E-19,8.53E-02,0,9.94E-02,2.51E-02,1.67E-02,2.32E-02,6.63E-03,5.50E-20,6.31E-19,1.09E-03,5.16E-19,1.74E-18,1.28E-02,3.10E-02,6.42E-03,0,1.04E-02,0.236886499,1.24E-02,5.42E-02,4.45E-02,9.87E-02,3.31E-02,0,2.37E-02,0.124598703,0,0.136362416,0,8.90E-02,0,0,0,4.00E-02,4.89E-02,1.00E-02,0.104041445,0.285313298,4.09E-02,0,5.32E-02,0,1.08E-02,0,57,92,106,0,0,0,92,0,0,0,0,0.46,NA,NA,-0.728045563,-0.762015393,1.433484996,-1.264967724,0.558911407,-0.579144556,-0.84279916,-0.800823869,-0.694694164,0.421685753,-1.117976774,0.161634387,-0.155458633,-0.233454158,5779.035657,5011.711958,10790.74761,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Endometrium,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 90 %,Positive : 75 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,CK7 - positive; Vimentin - positive;,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months,Yes,Living,54,No,No,No,Tumor Free,Never,Never,Never,Yes,No,Unknown,No,Complete Remission,Complete Remission,n/a,No,n/a,n/a,n/a,n/a,n/a,n/a,n/a,17,n/a,n/a,n/a,n/a,n/a
+C3N-02062,C3N-02062,No,b1,1,127C,CPT0134500003,Tumor,No,67,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.3,160,83,32.42,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,NA,0,CNV_L,No,7.00E-02,MSS,CNV_L,73,Mutated,Mutated,WT,Mutated,WT,1.17E-17,1.49E-18,2.43E-03,0,0,0,0,1.08E-02,4.99E-19,5.69E-03,0,9.25E-19,2.89E-02,0,2.42E-03,2.39E-18,0,0,3.02E-17,8.12E-02,2.69E-18,0,9.24E-04,6.94E-03,1.81E-18,0,4.98E-05,0,7.50E-18,4.26E-03,1.72E-02,6.72E-03,2.10E-17,9.40E-02,0.155266913,2.80E-18,6.24E-03,1.22E-18,6.24E-03,0,0,5.31E-02,0,0,0.391955364,0,0,0,0,0,1.96E-02,2.33E-03,0.228128164,5.70E-03,0.246055203,0,1.44E-02,0,1.22E-02,2.65E-02,0,30,36,0,0,0,0,0,0,0,0,0,0.81,NA,NA,-1.698677482,-2.136668882,-0.630072864,0.691906107,-1.301949188,-1.120022429,-1.170332152,-0.782234354,6.70E-02,-1.006483662,-0.921356697,-1.658597955,1.560987255,-0.375566557,1892.523803,1697.25622,3589.780024,Not Examined,Present,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 70 %,Positive : 60 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,CK7 - positive; Vimentin - positive; Estrogen - 70% (+++); Progesterone - 60% (++);,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02244,C3N-02244,No,b1,2,131C,CPT0128960003,Tumor,No,62,Female,Endometrioid carcinoma,G2 Moderately differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Not Reported,0,CNV_L,No,0,MSS,CNV_L,48,WT,Mutated,WT,Mutated,WT,9.60E-02,3.63E-02,7.51E-20,2.84E-19,0,2.01E-02,2.74E-02,0,1.04E-03,1.25E-02,0,2.80E-02,8.75E-20,0,1.08E-02,1.90E-02,6.24E-03,8.89E-02,6.45E-19,4.95E-02,2.25E-03,3.05E-03,7.21E-03,5.95E-03,0,0,4.43E-03,2.45E-20,3.38E-19,4.59E-02,1.40E-02,1.04E-02,2.64E-18,3.48E-02,1.44E-04,0,4.17E-02,5.39E-02,9.57E-02,0,5.09E-02,0.122627402,0.109951525,0,0.185850046,0,2.91E-02,1.98E-02,0,0,8.04E-02,5.54E-04,2.79E-02,7.51E-03,0.222394331,6.12E-02,5.95E-03,5.61E-02,0,0,1.97E-02,19,23,0,0,0,0,0,0,0,0,0,0.18,NA,NA,-0.107676348,9.76E-02,0.835401632,-1.123126051,0.169083077,-0.558616744,0.477976782,1.10908511,-0.128160498,0.58329071,0.591609536,1.259272706,-0.747330076,0.646962999,4754.892256,5088.957186,9843.849442,NA,NA,NA,Hispanic or Latino,Hispanic or Latino,Other: unknown,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02247,C3N-02247,No,b1,3,131N,CPT0265300003,Tumor,No,68,Female,Endometrioid carcinoma,G2 Moderately differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Unknown,NA,NA,No,7.93,MSI-H,MSI-H,728,WT,Mutated,WT,Mutated,WT,0.253732935,1.87E-02,3.08E-02,0,1.20E-03,4.99E-18,0.103786136,1.89E-03,4.08E-02,7.66E-02,3.74E-20,1.77E-02,2.23E-02,0,2.66E-02,1.26E-02,7.65E-19,0.157565663,0,0.143572972,3.21E-03,4.38E-03,1.46E-02,6.88E-03,0,0,3.10E-03,1.65E-20,3.06E-18,4.78E-03,1.20E-02,1.80E-02,2.73E-18,1.41E-17,0.158750667,3.25E-03,6.49E-02,8.67E-02,0.151643366,0,1.51E-02,1.03E-02,5.37E-02,0,0.31424386,0,0,0,0.165699366,0,0,0,0,0.109844507,0.209919838,6.39E-02,0,5.74E-02,0,0,0,9,64,316,0,0,0,0,92,0,0,72,NA,NA,NA,0.842084468,-1.627463096,-0.356452521,-1.418094345,0.396804101,-1.283228813,-0.169078372,-1.152918335,-0.350436708,-9.69E-02,-0.381885703,1.097287551,-0.5286894,5.67E-02,5290.709291,5725.192064,11015.90136,NA,NA,NA,Hispanic or Latino,Hispanic or Latino,Other: unknown,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02249,C3N-02249,No,b3,10,130N,CPT0129080003,Tumor,No,64,Female,Endometrioid carcinoma,G2 Moderately differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,White,0,CNV_L,No,0.06,MSS,CNV_L,63,WT,Mutated,Mutated,Mutated,Mutated,5.57E-02,0,5.59E-19,0,1.39E-18,6.01E-18,1.15E-17,4.21E-19,0,0,8.26E-19,4.56E-18,6.59E-03,0,2.06E-19,5.80E-03,3.39E-18,3.39E-03,3.44E-18,4.51E-02,0,4.97E-03,3.63E-19,0,2.93E-18,1.10E-17,9.93E-19,2.57E-18,7.36E-18,2.96E-02,2.86E-03,7.70E-03,1.17E-18,6.30E-02,4.14E-03,0,1.73E-17,5.23E-03,5.23E-03,0,0,6.12E-02,3.15E-03,0,0.324678703,0,0,0,0,4.20E-02,0,0,0.353641598,6.00E-04,0.155729002,0,3.77E-02,0,1.90E-02,0,2.27E-03,2,24,33,0,0,0,0,0,0,0,0,0.74,NA,NA,-2.45E-02,1.129819654,0.518511803,3.22E-02,-0.524796529,1.285313111,0.188739199,-3.77E-02,-3.73E-02,0.555652879,0.529770126,-1.208466992,0.666095782,1.677119566,3546.884365,3299.669416,6846.553781,NA,NA,NA,Hispanic or Latino,Hispanic or Latino,Other: unknown,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02253,C3N-02253,No,b4,14,128N,CPT0128770003,Tumor,No,53,Female,Endometrioid carcinoma,G2 Moderately differentiated,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,White,0,CNV_L,Yes,0.15,MSS,POLE,4219,Mutated,Mutated,WT,Mutated,Mutated,0.128769896,2.68E-02,0,2.00E-18,0,3.01E-03,5.52E-02,9.37E-04,6.90E-04,1.47E-02,5.11E-18,1.32E-19,1.97E-18,1.02E-18,2.87E-03,4.53E-18,3.21E-18,1.90E-18,7.86E-19,1.08E-02,3.79E-03,9.47E-03,6.78E-03,5.26E-03,0,0,6.26E-03,3.52E-19,1.33E-17,6.69E-02,2.84E-02,2.27E-02,0,0.100519471,1.63E-02,4.28E-03,2.63E-02,3.22E-18,2.63E-02,2.14E-02,0,5.23E-02,0.110794303,0,0.185194989,0,0.109066725,2.28E-02,0,6.96E-03,3.95E-02,1.94E-02,8.68E-02,8.13E-02,0.156586588,0,0,0.105098029,0,2.63E-03,0,224,117,0,0,1999,1869,0,0,0,0,0,0.76,NA,NA,-0.572640528,-0.221503713,-0.785765328,-0.165927255,1.69308959,-0.224887438,-0.193014583,-1.434108645,-7.85E-02,-1.141704929,-0.248593204,-0.941148449,1.025463306,0.17829777,2333.912514,4295.831047,6629.743562,NA,NA,NA,Not-Hispanic or Latino,Non-Hispanic,Other: unknown,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02271,C3N-02271,No,b1,3,128C,CPT0181670008,Tumor,No,58,Female,Endometrioid carcinoma,G2 Moderately differentiated,0.7,158,121,48,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,4,Stage I,NA,0,CNV_L,No,0,MSS,CNV_L,31,WT,Mutated,WT,WT,Mutated,6.48E-02,1.68E-02,0,0,0,4.30E-18,3.30E-02,0,0,1.05E-02,5.00E-18,3.08E-02,3.14E-03,1.34E-18,9.52E-03,2.47E-03,1.52E-21,9.94E-18,5.68E-18,2.76E-02,5.43E-03,1.42E-03,7.09E-03,1.03E-02,3.11E-18,7.62E-03,2.38E-03,1.62E-02,5.29E-18,6.52E-18,3.00E-02,2.40E-03,0,4.60E-02,2.96E-02,1.06E-03,4.39E-02,1.23E-03,4.52E-02,3.64E-02,0,5.67E-02,1.36E-02,0,0.276595024,0,1.41E-03,2.26E-02,0,0,8.68E-02,0,6.51E-02,1.72E-02,0.201748072,0,2.38E-02,0,1.79E-02,2.63E-02,0.153909886,9,21,0,0,0,0,0,0,0,0,0,0.25,Yes,No,-1.51E-04,0.747834004,1.239876315,0.394066094,0.997235066,0.80395918,1.218001135,1.504681034,1.20863107,-0.658134259,1.146738531,0.761140661,-0.58700222,1.159577578,3984.425899,5296.834209,9281.260107,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,4 or more,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Self-reported and Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Formetic|Vanatex|Adipine|Citrafleet,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,320|700|1063|1328,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,4 or more|4 or more|4 or more|4 or more,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,335.0|715.0|1078.0|1343.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-02274,C3N-02274,No,b2,5,128C,CPT0182380004,Tumor,No,80,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,158,84,33,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,7,Stage I,NA,0.161135217,CNV_L,No,38.49,MSI-H,MSI-H,1368,WT,Mutated,WT,Mutated,WT,0,0,0,2.26E-18,0,8.38E-18,1.80E-17,0,0,4.24E-18,3.17E-18,0,2.11E-02,0,1.72E-18,2.30E-03,1.68E-18,5.28E-18,0,1.73E-21,0,0,1.83E-04,1.30E-04,0,0,1.26E-18,1.01E-18,4.26E-17,2.47E-03,9.03E-03,2.12E-02,1.98E-20,5.46E-02,0.129406267,1.74E-17,8.64E-05,1.15E-03,1.24E-03,1.09E-02,0,0,0,0,0.231365004,0,1.11E-02,2.09E-02,0,5.55E-02,0,0,8.53E-02,2.12E-03,0.138203005,0.135116782,5.15E-02,0,0.161245324,5.50E-03,9.12E-02,0,108,425,0,0,0,0,248,0,0,208,0.6,Yes,No,-0.960910833,-0.35442055,-1.037890741,1.675003725,-1.144594546,7.85E-02,-3.47E-03,-0.563620465,0.314217582,-0.942210883,0.202177414,-1.276107442,1.452148315,-1.168394001,1718.934353,2235.34997,3954.284324,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Lokren|Siofor|Diured|Simvasterol|Lacipil|Vivace,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Deceased,316|615|977|971,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,With Tumor|Tumor Free|Unknown Tumor Status|Unknown Tumor Status,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Persistent Disease|Complete Remission|Unknown|n/a,n/a|n/a|n/a|Unknown,Yes|Yes|Yes|Yes,Distant Metastasis|Distant Metastasis|Distant Metastasis|Distant Metastasis,Brain|Brain|Brain|Brain,258.0|258.0|258.0|279.0,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,No|No|No|No,No|No|No|No,316.0|615.0|977.0|n/a,n/a|n/a|n/a|Cerebrovascular Disorder,n/a|n/a|n/a|1102.0,279.0|279.0|279.0|279.0,Surgical Resection|Surgical Resection|Surgical Resection|Unknown,RX: Presence of Residual Tumor cannot be assessed|RX: Presence of Residual Tumor cannot be assessed|RX: Presence of Residual Tumor cannot be assessed|RX: Presence of Residual Tumor cannot be assessed
+C3N-02296,C3N-02296,No,b3,9,129C,CPT0181750004,Tumor,No,83,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.7,150,90,40,Present,=50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pNX,0,Stage II,NA,0,CNV_L,No,17.56,MSI-H,MSI-H,514,WT,Mutated,WT,WT,WT,2.78E-02,5.13E-03,6.67E-20,0,7.87E-18,1.68E-03,6.63E-03,0,1.17E-18,3.05E-18,0,3.98E-19,0,0,5.99E-04,3.51E-19,3.03E-05,1.73E-04,0,1.84E-02,0,0,4.99E-04,1.38E-02,2.39E-20,0,7.49E-03,0,2.32E-19,6.17E-02,1.20E-02,7.64E-03,0,5.47E-02,6.49E-04,8.83E-03,1.30E-02,8.64E-05,1.31E-02,1.87E-02,0,2.57E-03,1.62E-02,0,0.181637678,0,4.45E-02,5.99E-02,0,0,5.67E-02,0,0.402025819,7.70E-03,0.124040119,0,0,0,7.88E-02,0,7.09E-03,37,100,172,0,0,0,0,79,0,0,0,0.85,Yes,No,1.31E-02,-0.317137185,-0.312541962,1.433738033,-0.161056638,-0.797784762,5.44E-02,-0.704653894,1.024086467,-0.80926426,0.234668224,0.606228964,1.094910038,-0.780992912,2602.18226,3024.123206,5626.305465,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Euthyrox|Siofor|Diuresin|Lisiprol|Simvacard,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Deceased,448|750|974|974,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|With Tumor|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Persistent Disease|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|Yes|Yes|Yes,n/a|Locoregional Recurrence|Locoregional Recurrence|Locoregional Recurrence,n/a|Other: vaginal cuff|Other: vaginal cuff|Other: vaginal cuff,n/a|750.0|750.0|750.0,n/a|No|Yes|Yes,n/a|No|No|No,n/a|No|No|No,n/a|No|No|No,448.0|750.0|974.0|n/a,n/a|n/a|n/a|Respiratory Disease,n/a|n/a|n/a|1268.0,n/a|n/a|817.0|817.0,n/a|n/a|Surgical Resection|Surgical Resection,n/a|n/a|R0: No Residual Tumor|R0: No Residual Tumor
+C3N-02298,C3N-02298,No,b3,10,131C,CPT0181540008,Tumor,No,83,Female,Endometrioid carcinoma,G1 Well differentiated,4,160,75,29,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,4,Stage I,NA,NA,NA,No,0,MSS,NA,56,WT,Mutated,WT,WT,WT,3.10E-02,2.66E-02,0,0,2.64E-18,0,8.37E-03,1.35E-18,0,2.79E-02,0,1.35E-02,1.45E-02,0,2.58E-03,6.83E-19,0,9.93E-18,0,5.07E-02,5.67E-04,2.74E-19,6.28E-03,8.96E-03,6.38E-19,5.79E-19,1.08E-02,2.65E-19,3.22E-18,1.09E-17,1.69E-02,1.28E-02,6.99E-20,0,2.24E-02,0,2.58E-02,5.31E-18,2.58E-02,4.38E-02,0,4.85E-03,0.141005068,0,0.172691468,0,0,4.35E-02,1.43E-02,0,0,0,0,1.29E-02,0.280991343,1.14E-02,1.50E-02,0.215700909,0,0,4.40E-02,14,34,0,0,0,0,0,0,0,0,0,NA,NA,NA,0.559241776,-0.540152595,0.970245124,1.000307016,-0.780444474,0.369785978,0.174319739,0.76700818,1.01E-02,-1.335195364,0.182926199,-0.703083495,1.340662281,0.239093334,3230.255382,3577.007665,6807.263047,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,clexane|Nitrendypina|Betaloc|Atoris|Nolpaza|Doxar|Tritace|Heparegen,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,378|625|987|1450,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,No|No|No|No,No|No|No|No,2|2|2|2,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,378.0|625.0|987.0|1450.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-02302,C3N-02302,No,b3,11,131C,CPT0181610003,Tumor,No,69,Female,Endometrioid carcinoma,G1 Well differentiated,6.5,160,78,30,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,16,Stage I,NA,2.12E-02,CNV_L,No,0.01,MSS,CNV_L,76,WT,Mutated,Mutated,Mutated,Mutated,5.97E-02,5.82E-03,0,0,0,0,1.93E-18,8.94E-20,0,9.27E-03,3.07E-18,0,4.21E-03,4.50E-20,9.11E-03,0,1.87E-03,1.85E-19,0,8.57E-02,1.96E-02,1.74E-02,1.18E-02,2.56E-03,0,1.35E-02,5.54E-03,6.12E-03,0,2.21E-02,1.30E-02,6.88E-03,2.94E-19,4.81E-02,6.02E-02,6.01E-03,3.90E-02,2.60E-19,3.90E-02,0,0,0.111216784,0.120963476,0,0.172160335,9.29E-03,6.11E-03,0,0,2.73E-02,0,0,6.57E-02,1.58E-03,0.323537185,2.67E-02,7.57E-03,0,0,3.96E-02,8.83E-02,40,33,0,0,0,0,0,0,0,0,0,0.65,Yes,No,0.865147527,-0.131179063,-0.213954775,-0.478838545,-0.369031154,0.146959813,0.585819275,0.688512187,0.315352122,-0.259014447,0.726497766,1.127427742,-0.33141093,0.709827011,3872.297694,4392.003162,8264.300857,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Ebivol|Diuresin|Karbis|Glucophage 750g,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,503|746|993|1357,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,Yes|Yes|Yes|Yes,Yes|Yes|Yes|Yes,3|3|3|3,No|No|No|No,Complete Remission|Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,503.0|746.0|993.0|1357.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-02337,C3N-02337,No,b3,12,128C,CPT0136280003,Tumor,No,70,Female,Endometrioid carcinoma,G2 Moderately differentiated,0.8,164,81,30.12,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,10,Stage I,NA,0,CNV_L,No,0.04,MSS,CNV_L,57,WT,Mutated,WT,Mutated,WT,3.95E-02,4.23E-03,1.92E-19,0,0,1.19E-02,1.99E-18,3.58E-19,4.59E-03,8.66E-03,0,7.50E-03,0,0,2.90E-04,9.72E-03,0,8.86E-19,3.76E-18,4.45E-02,0,0,2.98E-04,2.15E-02,8.04E-20,6.88E-19,7.69E-03,0,3.32E-18,3.83E-03,1.70E-02,1.11E-02,0,2.57E-02,2.25E-02,1.19E-02,2.04E-02,4.86E-03,2.53E-02,0,2.42E-02,0,0,0,0.378661096,0,5.29E-02,2.82E-02,0,3.70E-02,7.47E-02,3.59E-02,0.184791056,2.53E-02,0.146671385,0,1.13E-02,5.60E-04,0,0,0,16,20,0,0,0,0,16,0,0,0,0,0.34,NA,NA,0.634611046,-0.706436551,-0.655793985,-0.747967565,-0.569300874,-0.398115283,-1.97E-02,-0.318297464,0.822338019,-0.38505721,-3.84E-02,-1.81E-02,-0.534978146,0.156357041,3157.377031,3626.221991,6783.599023,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02372,C3N-02372,No,b2,7,131N,CPT0137140008,Tumor,No,54,Female,Endometrioid carcinoma,G2 Moderately differentiated,1.3,173,90,30.07,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,10,Stage I,NA,6.32E-02,CNV_L,No,22.4,MSI-H,MSI-H,1310,WT,Mutated,WT,Mutated,WT,0.178237985,9.34E-03,2.67E-02,2.06E-19,3.95E-03,0,4.67E-02,1.07E-02,3.51E-02,5.21E-02,4.64E-03,2.84E-03,2.60E-02,1.55E-20,4.54E-02,3.86E-02,9.84E-03,0.141676588,8.44E-03,0.103320842,3.06E-02,2.71E-02,6.42E-03,1.02E-02,0,4.46E-03,1.54E-03,0,0,4.03E-03,7.93E-02,7.24E-03,0,0,0.359208259,7.91E-03,9.93E-02,0.1003232,0.199631902,4.33E-02,0,5.25E-02,0.106278504,0,0.194169252,4.47E-02,7.03E-02,0,0,0,6.94E-02,7.01E-04,1.53E-02,0.113468193,0.221076784,2.20E-02,3.38E-03,3.23E-02,0,1.62E-03,9.46E-03,374,240,0,0,0,0,383,0,0,0,0,0.17,NA,NA,-0.230424072,-0.118166316,0.795624078,-0.299328933,4.20E-02,-0.27518029,-0.132789313,-0.562002205,0.238820503,0.759833964,-0.335783349,0.774896022,-0.626412557,-0.637548054,6356.338308,5859.178928,12215.51724,Not Examined,Present,Margins uninvolved by invasive carcinoma,NA,Slavonic,Ukraine,Other,Entire uterine cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,NA,NA,NA,NA,NA,NA,Yes,2,12 Months|24 Months,No|No,Living|Living,372|818,No|No,No|No,No|No,Tumor Free|Tumor Free,Never|Never,Never|Never,Never|Never,No|No,No|No,Unknown|Unknown,No|No,Complete Remission|Complete Remission,Complete Remission|Complete Remission,n/a|n/a,No|No,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,341.0|787.0,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-02436,C3N-02436,No,b4,14,131C,CPT0115530003,Tumor,No,51,Female,Endometrioid carcinoma,G1 Well differentiated,8,160,122,47.47,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,13,Stage I,White,7.67E-02,CNV_L,No,35.64,MSI-H,MSI-H,1253,Mutated,Mutated,WT,Mutated,WT,4.93E-02,2.46E-02,1.13E-19,0,2.30E-19,4.88E-18,5.68E-02,1.46E-03,1.74E-02,5.43E-02,3.13E-03,8.39E-03,2.50E-20,0,4.68E-02,0,4.46E-03,8.24E-18,1.14E-18,1.08E-02,3.04E-02,3.05E-02,1.36E-02,2.95E-03,0,7.53E-18,5.45E-03,1.46E-18,1.43E-17,3.20E-02,4.41E-02,8.30E-03,0,0.136260905,7.79E-02,1.31E-02,8.44E-02,4.12E-18,8.44E-02,0,0,1.57E-02,0.111668301,0,0.227725853,1.06E-02,3.43E-02,7.03E-02,0,3.08E-02,2.81E-02,0,0.107232895,7.35E-02,0.274861568,1.75E-03,0,1.35E-02,0,0,0,178,207,0,0,0,0,275,119,0,0,82,0.48,Yes,No,0.130519566,-0.197965889,0.586393386,-0.469053254,6.58E-02,-1.330719595,-1.093150363,-0.530102096,1.141768576,-1.051315758,-1.326021881,0.446566161,0.707129872,-1.509491247,4096.108384,5571.055976,9667.16436,Atypical and/or suspicious,Not identified,Cannot be assessed,Not-Hispanic or Latino,white,United States,Other,entire endometrial cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Premenopausal: less than 6 months since LMP AND no prior bilateral oophorectomy AND not on estrogen replacement,Never,1,ERG in A12: positive for lymphovascular invasion,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,tylenol|ferrous sulfate|lisinopril-hydrochlorothiazide|potassium chloride CR|zoloft,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,332|740|1103|1502,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Unknown|Unknown|Unknown|Unknown,Never|Never|Never|Never,Yes|Yes|Yes|Yes,No|No|No|No,1|1|1|1,No|No|No|No,Not Applicable|Not Applicable|Not Applicable|Not Applicable,Complete Remission|Complete Remission|Not Applicable|n/a,n/a|n/a|n/a|Not Applicable,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,299.0|707.0|1070.0|1469.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-02437,C3N-02437,No,b1,1,128C,CPT0159110003,Tumor,No,61,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.8,163,156,58.87,Present,=50% myometrial invasion,Seventh Edition (2010),pT3a (FIGO IIIA),pN0,27,Stage III,White,0,CNV_L,No,8.52,MSI-H,MSI-H,323,WT,Mutated,Mutated,Mutated,Mutated,5.03E-02,0,5.42E-04,4.89E-18,4.13E-19,0,3.32E-19,4.65E-03,1.04E-02,2.81E-03,0,3.43E-19,3.57E-20,0,3.61E-03,1.72E-19,0,1.43E-17,0,1.67E-02,1.23E-19,0,1.11E-19,4.16E-03,0,0,2.39E-03,0,7.35E-18,2.29E-04,2.42E-02,1.50E-02,2.05E-03,4.23E-02,9.91E-02,1.34E-02,1.21E-02,7.34E-18,1.21E-02,0,1.48E-03,0,2.00E-02,0,0.28938527,0,5.16E-03,7.16E-03,0,0,5.78E-02,0,0.259374602,3.83E-02,0.155608113,2.98E-02,8.39E-02,0,2.84E-02,1.26E-02,1.10E-02,0,51,141,0,0,0,0,36,0,0,0,0.76,NA,NA,0.846722283,-0.101513466,-0.144841051,0.91979873,-0.17787433,0.787821688,0.806534529,0.473683518,1.193911538,-0.651134294,0.672034779,-1.17935931,0.537376115,1.365153275,3061.091476,3498.932238,6560.023713,Negative for malignancy/normal/benign,Not identified,Cannot be assessed,Not-Hispanic or Latino,white,United States,Other,anterior and posterior,Multifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,Prussian blue stain for iron: highlights hemosiderin (control positive); Kinyoun stain for mycobacteria: negative (control positive),NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,aspirin|carvedilol|clonidine|cyproheptadine|furosemide|naproxen sodium|potassium chloride|valsartan,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,308|764|1036,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Yes|Yes|Yes,No|No|No,2|2|2,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,290.0|746.0|1018.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02595,C3N-02595,No,b4,15,130N,CPT0181810004,Tumor,No,41,Female,Endometrioid carcinoma,G3 Poorly differentiated,10,165,50,18,Present,=50% myometrial invasion,Seventh Edition (2010),pT3a (FIGO IIIA),pN1 (FIGO IIIC1),22,Stage IV,NA,1.371947271,CNV_L,No,26.97,MSI-H,MSI-H,1133,WT,Mutated,WT,Mutated,Mutated,9.03E-02,6.03E-03,7.52E-03,0,3.93E-18,2.97E-19,4.97E-03,5.57E-03,2.10E-04,2.26E-02,3.43E-18,1.76E-03,2.74E-02,2.12E-18,8.13E-03,5.30E-19,1.22E-19,0,1.46E-17,2.58E-02,1.28E-02,7.54E-03,4.20E-03,9.07E-03,0,2.46E-19,4.47E-03,0,0,1.11E-02,1.93E-02,8.40E-03,3.62E-19,2.62E-02,0.164645203,0,2.42E-02,2.65E-19,2.42E-02,4.14E-03,0,0.11398473,4.32E-02,0,0.214473912,0,3.41E-02,0,1.96E-02,0,3.62E-02,0,0.12359541,5.96E-02,0.240601901,3.81E-02,0,0,3.10E-02,1.90E-02,2.24E-02,69,6,424,0,0,0,0,173,0,0,104,0.34,NA,NA,-0.216454753,0.610482789,0.80700581,0.530336339,-0.566719449,0.98811317,0.523465296,6.99E-02,2.71E-02,-0.432048403,0.439976356,-0.620225453,-0.401425309,0.272969203,3858.489953,4137.281022,7995.770975,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Other,Fundus and lower part with isthmus - infiltration of the cervix,Multifocal,IHC staining not done,3,11,IHC staining not done,0,0,IHC staining not done,0,cM1,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Premenopausal: less than 6 months since LMP AND no prior bilateral oophorectomy AND not on estrogen replacement,Never,None,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Olanzapin|Pantoprazol|Exacyl|Cyclonamine|Hydroxyzinum|Clexane,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,489|721|980,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Unknown|Unknown|Unknown,Never|Never|Never,No|No|No,No|No|No,None|None|None,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,509.0|741.0|1000.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02598,C3N-02598,No,b3,12,131N,CPT0187960004,Tumor,No,60,Female,Endometrioid carcinoma,G2 Moderately differentiated,8,157,54,21,Present,=50% myometrial invasion,Seventh Edition (2010),pT4 ((FIGO IVA),pN2 (FIGO IIIC2),0,Stage IV,NA,3.83E-02,CNV_L,No,6.41,MSI-H,MSI-H,941,Mutated,Mutated,WT,Mutated,WT,9.62E-02,8.72E-05,4.48E-03,0,0,3.57E-18,3.91E-02,6.18E-05,8.79E-04,2.71E-02,1.23E-18,0,1.47E-02,0,8.97E-03,1.66E-02,6.19E-19,7.13E-02,0,3.09E-02,2.13E-02,1.36E-02,8.88E-03,7.09E-21,2.88E-19,1.09E-02,4.35E-03,2.28E-03,5.20E-18,3.30E-03,1.47E-02,6.50E-03,7.17E-18,7.55E-03,8.71E-02,1.87E-03,2.97E-02,4.64E-02,7.61E-02,0,0,4.11E-02,4.24E-02,0,0.291548358,0,1.26E-02,0,0,0,4.02E-02,0,6.08E-02,3.22E-02,0.144917984,9.65E-02,0,0,0.106095108,4.69E-02,8.47E-02,149,35,507,0,0,0,0,194,0,0,0,0.49,NA,NA,0.486631463,1.234906624,-1.228286315,1.321823493,-0.744782971,1.954233069,1.32086742,0.304020926,0.114762218,1.056982929,1.30256427,-0.555580639,1.436945537,1.452465137,5311.754482,4933.694273,10245.44875,Not Examined,Not identified,Margin(s) involved by invasive carcinoma,NA,Caucasian,Poland,Other,"Istmus, cervical canal and fundus and anterior wall of uterine corpus",Unifocal,0,0,6,1,1,1,0,0,cM1,R0: No residual tumor,Positive : % Not available,Negative,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Current smoker: Includes daily and non-daily smokers,Unknown,NA,20,NA,Yes,Self-reported and Medical Record,Yes,Medical Record|Medical Record,Citrafleet|Fragmin,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,267|659|1068,No|No|No,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,No|No|No,No|No|No,2|2|2,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,267.0|659.0|1068.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02631,C3N-02631,No,b3,11,128N,CPT0181960004,Tumor,No,77,Female,Serous carcinoma,G3 Poorly differentiated,8,156,75,30.82,Present,<50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN0,6,Stage II,NA,26.26196116,CNV_H,No,0.86,MSS,CNV_H,64,Mutated,WT,WT,WT,Mutated,0,0,4.91E-19,0,0,0,5.03E-18,8.80E-03,1.00E-17,0,2.21E-18,0,2.05E-02,0,0,6.27E-03,0,2.02E-02,0,0.103686034,0,4.42E-19,2.37E-04,2.88E-19,0,0,8.25E-19,8.38E-19,9.80E-18,3.86E-02,1.06E-02,1.24E-02,7.65E-18,5.82E-02,0.189308078,2.64E-18,1.40E-17,1.38E-02,1.38E-02,0.157999115,0,2.52E-02,4.79E-02,0,6.51E-03,0,0.134292989,0,0,0,7.99E-02,7.69E-04,3.93E-02,0,0.269409935,0,0.20074118,3.79E-02,0,0,0,13,48,0,0,0,0,0,0,0,0,0,0.47,Yes,No,-0.571092058,-1.151372585,-0.862799493,-1.536707086,-0.278985331,-0.542342851,-2.460955649,-1.061078949,0.503625473,-0.398774199,-2.501241837,-2.674330255,0.218722636,-2.383170511,2213.438711,865.788058,3079.226769,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Multifocal,IHC staining not done,0,8,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 5 %,Positive : 5 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,"Vimentin (+)-5%,p16 (+)-95%,CEA(+)-1%, CK5/6(+)-1%",NA,Yes,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Self Report|Self Report|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Beto ZK|Atoris|Glucophage |Polpril|Verospiron,Melanoma malignum,Self Report,Surgery,Unknown,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,291|679|1107,No|No|No,Yes|Yes|Yes,No|No|No,Tumor Free|With Tumor|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,Yes|Yes|Yes,1|1|1,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Persistent Disease|n/a,n/a|n/a|Complete Remission,No|Yes|No,n/a|Distant Metastasis|n/a,n/a|Other: abdomen wall near umbilicus|n/a,n/a|679.0|n/a,n/a|No|n/a,n/a|No|n/a,n/a|No|n/a,n/a|No|n/a,291.0|679.0|1107.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02632,C3N-02632,No,b1,1,129N,CPT0182020004,Tumor,No,57,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.8,174,89,29,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,3,Stage I,NA,0,CNV_L,No,0.02,MSS,CNV_L,63,WT,Mutated,WT,Mutated,WT,2.95E-02,3.88E-03,0,4.77E-18,0,0,3.54E-18,0,1.16E-17,5.48E-20,3.48E-17,0,2.80E-03,1.87E-18,9.09E-23,0,0,1.18E-18,0,3.50E-02,8.83E-04,1.83E-04,4.80E-03,7.42E-03,3.50E-18,9.45E-04,6.69E-03,4.08E-02,2.68E-17,0,9.09E-03,6.40E-03,1.50E-18,6.09E-02,3.20E-02,5.30E-19,3.59E-02,5.90E-19,3.59E-02,0,7.20E-03,2.33E-04,8.20E-05,0,0.325105995,0,2.63E-02,2.31E-02,0,0,0.137976343,0,3.03E-02,0,3.59E-02,0,1.20E-02,0,6.40E-02,1.70E-02,0.320842759,18,17,0,0,0,0,22,0,0,0,0,0.56,NA,NA,-0.434784163,-0.24884144,-0.494035294,1.053296563,-0.48604084,-8.09E-02,1.640355071,0.380147841,0.865443951,-0.647115205,1.47754223,-0.37469067,1.152536238,0.877684072,3054.31647,4676.254457,7730.570926,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Euthyrox|Citrafleet|Fragmin|Cipronex|Natrium kalium|Dormicum,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months|48 Months,No|No|No|No,Living|Living|Living|Living,497|859|960|1405,No|No|No|No,No|No|No|No,No|No|No|No,Tumor Free|Tumor Free|Tumor Free|Tumor Free,Never|Never|Never|Never,Never|Never|Never|Never,Never|Never|Never|Never,No|No|No|No,No|No|No|No,Unknown|Unknown|Unknown|Unknown,No|No|No|No,Unknown|Unknown|Complete Remission|Complete Remission,Unknown|Complete Remission|Complete Remission|n/a,n/a|n/a|n/a|Complete Remission,No|No|No|No,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,510.0|872.0|973.0|1418.0,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a,n/a|n/a|n/a|n/a
+C3N-02635,C3N-02635,No,b4,13,127N,CPT0182070004,Tumor,No,56,Female,Endometrioid carcinoma,G3 Poorly differentiated,3,150,80,35,Present,=50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN1 (FIGO IIIC1),9,Stage II,NA,13.35596277,CNV_H,No,0.5,MSS,CNV_H,66,WT,Mutated,WT,WT,Mutated,0.115812689,2.67E-02,5.25E-20,5.88E-18,1.58E-19,8.31E-19,4.49E-18,3.86E-19,7.54E-19,0,9.99E-18,0,8.17E-03,0,1.34E-19,6.86E-03,0,6.20E-18,1.23E-18,6.24E-02,2.02E-02,2.33E-02,1.57E-02,1.76E-03,0,0,1.42E-03,0,0,1.28E-03,1.55E-02,3.92E-02,8.63E-18,6.46E-02,0.130536771,0,3.24E-02,3.79E-03,3.62E-02,0,7.52E-03,8.39E-02,3.94E-02,0,0.109467339,0,2.37E-02,0,0,0,3.38E-02,0,9.46E-02,1.41E-02,0.352609746,0,0.143849245,6.03E-02,0,0,3.68E-02,14,14,0,0,0,11,17,0,0,0,0,0.83,Yes,No,-0.728620323,-0.21165115,-6.94E-02,0.210879394,2.078698042,-0.708728998,-0.145974832,0.243789143,0.259294703,-0.791693064,-5.03E-02,-1.681788585,0.429710828,-1.149392898,3589.221054,3933.817101,7523.038154,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,IHC staining not done,1,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Siofor|enalapril|Vastan,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,330|619|1025,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Unknown|Unknown|Unknown,Never|Never|Never,Yes|Yes|Yes,Yes|Yes|Yes,3|3|3,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|Complete Remission,n/a|n/a|n/a,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,330.0|619.0|1025.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02636,C3N-02636,No,b4,14,127N,CPT0182150003,Tumor,No,72,Female,Endometrioid carcinoma,G1 Well differentiated,5.5,157,83,33,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,3,Stage I,NA,1.163072893,CNV_L,No,0.11,MSS,CNV_L,74,WT,WT,Mutated,Mutated,WT,2.68E-02,0,0,5.37E-20,0,4.48E-18,0,3.39E-03,6.22E-20,2.62E-03,0,1.16E-18,8.70E-03,6.32E-19,5.10E-20,1.77E-03,2.31E-19,6.96E-02,1.55E-18,8.65E-03,2.38E-21,1.75E-03,2.13E-04,1.24E-03,5.66E-19,0,2.47E-03,2.91E-04,0,9.28E-03,5.21E-03,7.97E-03,4.48E-18,3.37E-02,0.106936974,4.45E-18,1.02E-03,3.73E-02,3.84E-02,1.78E-02,0,4.00E-02,5.71E-02,0,0.180180067,0,4.22E-02,3.86E-02,0,0.117838441,0,3.84E-02,3.54E-02,2.36E-02,0.121293967,1.16E-02,4.19E-02,0,0.137790059,9.42E-02,2.20E-03,12,22,0,0,0,9,22,0,0,0,0,0.64,NA,NA,1.098775714,-0.170872415,-0.648246186,1.574011104,-0.398238555,0.467888856,8.84E-02,0.145085363,0.263673245,0.301099209,0.307669417,-4.41E-02,-0.668321467,0.925265962,3856.040287,2842.949468,6698.989755,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Other,Fundus (main tumor) and isthmus (implants),Multifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,Vimentin positive,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Tritace|Clexane|Citrafleet,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,361|676|1096,Yes|Yes|No,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,No|No|No,None|None|1,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,382.0|697.0|1117.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02637,C3N-02637,No,b4,15,131C,CPT0182210004,Tumor,No,53,Female,Endometrioid carcinoma,G2 Moderately differentiated,3,168,138,48,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pNX,0,Stage I,NA,4.45E-03,CNV_L,Yes,0.18,MSS,POLE,7455,Mutated,Mutated,WT,Mutated,Mutated,0.148903082,8.86E-02,9.68E-03,1.00E-03,0,0,6.56E-03,3.28E-03,0,2.46E-02,1.16E-17,1.67E-02,1.07E-02,0,1.62E-02,2.86E-03,4.92E-18,0,2.54E-19,2.34E-02,1.74E-02,2.30E-02,3.86E-04,1.10E-02,2.04E-02,0,2.83E-02,1.15E-19,1.63E-17,4.73E-02,9.35E-02,1.09E-02,4.24E-18,5.86E-02,0.200077682,1.93E-02,8.88E-02,1.43E-03,9.02E-02,5.41E-02,1.40E-02,1.68E-02,5.74E-02,0,0.154482517,0.101144466,7.98E-02,0,0,6.17E-02,0,0,8.35E-02,7.39E-02,0.207691672,0,0,0,6.11E-02,0,3.43E-02,485,0,0,0,2248,4706,0,0,0,0,0,0.65,NA,NA,-0.153843257,0.180211463,0.195008256,0.890892324,-1.25804392,1.127235908,0.520790353,-1.328790363,0.885768342,-1.117668355,0.266184135,6.17E-02,1.020523657,0.379026749,3953.376146,5441.827192,9395.203338,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 70 %,Positive : 80 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,Yes,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,"EMA (+), Vimentin (+), CD10 (+)",NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record,No,NA,NA,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,302|687|1135,Yes|No|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,No|No|No,No|No|No,1|1|1,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Patient Deceased,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,302.0|687.0|1135.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02639,C3N-02639,No,b4,14,130N,CPT0182290003,Tumor,No,59,Female,Endometrioid carcinoma,G3 Poorly differentiated,9,157,78,31,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,13,Stage I,NA,31.81337824,CNV_H,No,1.41,MSS,CNV_H,53,Mutated,WT,WT,WT,WT,6.77E-03,0,6.60E-20,4.04E-18,0,0,2.86E-18,8.21E-05,2.71E-18,4.39E-18,0,1.49E-18,1.02E-02,0,1.36E-19,3.02E-03,0,0,0,0.130277113,0,1.13E-03,0,1.51E-20,0,0,0,4.57E-19,6.20E-19,0,1.28E-02,7.74E-03,0,7.78E-03,0.166392433,6.78E-18,2.62E-18,6.05E-03,6.05E-03,2.90E-03,0,3.12E-02,3.13E-02,0,0,0,4.80E-02,0.11176199,0.100421579,1.70E-03,0,3.77E-02,0.109428011,0,5.06E-02,0,0.244140879,0.206347578,0,2.45E-02,0,14,24,0,12,0,0,0,0,0,0,0,0.75,NA,NA,0.767210751,2.36E-03,0.317344385,0.977599381,0.586863491,0.245999258,-1.26931856,0.398623514,1.560387177,-0.402292938,-1.053987618,-0.373247649,0.285155769,-0.823684333,3045.532716,2255.926224,5301.45894,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Multifocal,0,0,6,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : 10 %,Positive : 20 %,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,3,"EMA (+), Vimentin (+), CD10 (-), CK8/18(+),CK5/6(-),bcl2(+),SMA(-),DES(-)",NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Beto ZK|Nitrendypina|Tritace|Signopan|Solpadeine,NA,NA,NA,NA,Yes,1,12 Months|24 Months,No|No,Living|Deceased,294|703,Yes|Yes,Yes|Yes,Yes|Yes,With Tumor|Tumor Free,Never|Never,Never|Never,Never|Never,Yes|Yes,No|Yes,3|1,No|No,Persistent Disease|Persistent Disease,Persistent Disease|Persistent Disease,n/a|n/a,Yes|Yes,Distant Metastasis|Distant Metastasis,"Other: peritoneum, omentum majus, ascites|Other: peritoneum, omentum majus, ascites",324.0|324.0,No|No,No|No,Yes|Yes,No|No,294.0|n/a,n/a|Malignant Neoplasm,n/a|526.0,n/a|n/a,n/a|n/a,n/a|n/a
+C3N-02678,C3N-02678,No,b3,11,130C,CPT0182340004,Tumor,No,69,Female,Endometrioid carcinoma,G1 Well differentiated,5.5,164,66,24,Present,=50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN0,8,Stage II,NA,0,CNV_L,No,13.39,MSI-H,MSI-H,482,WT,Mutated,Mutated,Mutated,WT,3.74E-02,2.39E-04,0,1.32E-18,0,1.70E-19,7.54E-03,1.01E-19,1.61E-20,0,0,2.06E-18,2.34E-02,0,2.22E-03,6.29E-03,0,8.00E-03,0,4.63E-02,0,1.57E-04,4.53E-03,5.24E-03,0,0,1.66E-04,5.08E-19,5.37E-18,8.01E-03,2.89E-02,1.30E-02,4.87E-21,2.94E-02,6.51E-02,4.02E-03,5.13E-03,8.38E-03,1.35E-02,1.29E-02,0,0.186416001,0,0,0.299349686,2.43E-02,0,0,0,0,0,2.23E-02,0.283531578,1.28E-02,0.104148659,1.85E-02,0,1.99E-02,0,0,1.59E-02,56,57,0,0,0,0,247,0,0,0,0,0.46,NA,NA,0.931996304,-0.234767653,-0.529491834,0.715090167,-1.207887158,0.438337251,4.22E-02,0.16601276,1.021736144,0.611077934,0.259159624,6.02E-02,0.151047993,1.262981778,3790.01119,3673.92532,7463.93651,Not Examined,Present,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,1,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,Unknown,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Concor|Tulip|Afobam|Zyrtec,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Deceased,266|707|580,Yes|Yes|Yes,No|No|No,No|No|No,With Tumor|With Tumor|With Tumor,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,No|No|No,Unknown|Unknown|2,No|No|No,Persistent Disease|Persistent Disease|Complete Remission,Persistent Disease|Persistent Disease|n/a,n/a|n/a|Patient Deceased,Yes|Yes|Yes,Distant Metastasis|Distant Metastasis|Distant Metastasis,Lung|Lung|Lung,266.0|545.0|545.0,No|No|No,No|No|No,Yes|No|Yes,No|No|No,266.0|707.0|n/a,n/a|n/a|Uterine Corpus Cancer,n/a|n/a|598.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02789,C3N-02789,No,b4,13,131N,CPT0186010004,Tumor,No,61,Female,Endometrioid carcinoma,G2 Moderately differentiated,3.5,147,56,25.7,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,17,Stage II,White,NA,NA,No,12.08,MSI-H,MSI-H,420,WT,Mutated,Mutated,Mutated,WT,0.141682818,1.13E-02,0,2.58E-18,0,5.61E-18,1.31E-18,6.96E-03,2.75E-02,4.14E-02,0,2.36E-03,2.05E-02,1.90E-18,4.20E-03,0,0,0,1.70E-18,6.02E-04,3.89E-02,2.44E-02,1.94E-02,1.50E-03,0,2.39E-03,9.87E-03,6.66E-03,0,5.42E-03,3.69E-02,2.11E-03,5.83E-18,1.57E-02,0.183682185,0,6.16E-02,4.00E-19,6.16E-02,0,0,3.47E-03,3.86E-02,0,0.144920809,5.21E-03,2.55E-02,0,2.58E-02,0,2.70E-02,0,8.46E-02,7.32E-02,0.317682796,2.23E-02,0,0,9.71E-02,5.60E-02,7.85E-02,13,16,169,0,0,0,0,58,0,0,48,NA,NA,NA,0.552801952,0.918609667,-3.34E-02,-0.767812714,0.630192871,1.229641589,1.659742073,1.123896171,1.512568695,-0.490776871,1.38183436,-0.651425363,0.749161776,0.274117739,3305.802106,4866.046928,8171.849034,Positive for malignancy,Present,Cannot be assessed,Not-Hispanic or Latino,white,United States,Other,entire cavity,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM1,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,Unknown,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Current Therapy,1,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,"Current reformed smoker, more than 15 years",20,21,2,0.1,Exposure to secondhand smoke history not available,NA,Yes,Medical Record|Medical Record|Medical Record,calcium carbonate|lysine|Multivitamin,NA,NA,NA,NA,No,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,285|725|1137,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Former Therapy|Former Therapy|Former Therapy,Former Therapy|Former Therapy|Former Therapy,Unknown|Unknown|Unknown,No|No|No,No|No|No,1|1|1,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,255.0|695.0|1107.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02947,C3N-02947,No,b2,8,128N,CPT0158520008,Tumor,No,51,Female,Endometrioid carcinoma,G2 Moderately differentiated,5.5,170,65,22.39,Present,=50% myometrial invasion,Seventh Edition (2010),pT1 (FIGO I),pN0,2,Stage I,White,0,CNV_L,No,30.36,MSI-H,MSI-H,921,Mutated,Mutated,WT,Mutated,Mutated,7.35E-02,2.04E-02,8.11E-03,0,1.88E-20,4.85E-18,3.68E-02,8.23E-03,4.32E-03,0,0,4.71E-03,2.74E-02,0,0,3.40E-03,0,0,0,1.28E-02,7.26E-03,7.76E-03,5.17E-03,4.51E-03,0,0,6.09E-03,0,1.44E-17,1.34E-02,2.71E-02,2.15E-02,0,4.86E-02,0.194891097,3.70E-19,2.43E-02,1.70E-03,2.60E-02,1.69E-02,0,6.58E-02,0.119800608,0,0.232481626,4.00E-02,0.100749453,0,0,0,2.42E-02,6.32E-03,0.168448027,2.04E-02,5.51E-02,0,5.57E-02,0,9.39E-02,1.44E-04,0,60,0,364,0,0,0,0,115,0,0,91,0.57,NA,NA,-1.019201651,1.030815245,-1.253029177,0.700607819,-1.151180079,1.29795756,0.897221983,-0.317105163,1.2015646,-4.09E-02,0.850381647,-0.936510283,-7.70E-02,-0.670449534,3107.034864,4311.345097,7418.379961,Positive for malignancy,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,white,United States,Other,anterior and posterior,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Premenopausal: less than 6 months since LMP AND no prior bilateral oophorectomy AND not on estrogen replacement,Unknown,None,NA,NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Current smoker: Includes daily and non-daily smokers,Unknown,NA,5,NA,Exposure to secondhand smoke history not available,Medical Record,Yes,Medical Record|Medical Record,acetaminophen |hydrocodone-acetaminophen,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|Yes,Living|Living|Living,331|856|856,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,No|No|No,No|No|No,None|None|None,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,331.0|856.0|856.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02972,C3N-02972,No,b4,15,131N,CPT0159220008,Tumor,No,74,Female,Endometrioid carcinoma,G2 Moderately differentiated,4.1,152,88,38.08,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN1 (FIGO IIIC1),19,Stage III,White,0,CNV_L,No,0.03,MSS,CNV_L,77,Mutated,Mutated,WT,Mutated,Mutated,0.11022494,6.88E-04,1.55E-19,0,1.54E-19,0,0,3.90E-19,2.32E-18,8.85E-04,9.31E-18,1.26E-02,6.92E-03,1.50E-18,3.79E-03,4.27E-03,4.92E-20,4.07E-18,0,4.22E-02,3.72E-03,7.21E-03,4.05E-03,1.76E-03,0,4.09E-18,5.44E-20,1.11E-03,0,2.97E-18,1.73E-02,8.54E-04,2.02E-19,4.50E-02,4.79E-02,2.55E-19,7.38E-03,2.14E-03,9.52E-03,0,0,8.36E-03,6.10E-02,0,0.245884886,0,4.87E-02,1.69E-02,0,0,7.72E-02,1.21E-02,0,2.99E-02,0.239081182,0,4.08E-02,0,0.118302791,3.62E-02,6.54E-02,30,37,0,0,0,0,0,0,0,0,0,0.26,Yes,Yes,-8.51E-02,1.335986316,0.430705105,0.985335445,1.352598595,1.266359789,1.749552285,1.33586285,0.185018664,-4.60E-02,1.84529309,0.477400599,1.069508234,1.19664878,4104.78479,4741.808055,8846.592846,Negative for malignancy/normal/benign,Not identified,Margins uninvolved by invasive carcinoma,Not-Hispanic or Latino,white,United States,Other,Anterior and Posterior,Unifocal,IHC staining not done,1,3,IHC staining not done,0,0,IHC staining not done,0,Staging Incomplete,RX: Presence of residual tumor cannot be assessed,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,Unknown,Cytokeratin polytypic: positive ;PAX 8: positive;MOC31: negative;WT-1: negative; Vimentin: positive ; P16: mosaic pattern,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,Exposure to secondhand smoke history not available,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,acetaminophen-codeine|aspirin |ducolax|zyrtec|Vitamin D|celexa|glucotrol|hydrochlorothiazide|lisinopril|magnesium oxide|metformin|metoprolol|multi-vitamin (centrum women)|prilosec|pravachol,NA,NA,NA,NA,Yes,2,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,293|874|1202,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Unknown|Unknown|Unknown,Yes|Yes|Yes,Yes|Yes|Yes,Unknown|Unknown|Unknown,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,251.0|832.0|1160.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02976,C3N-02976,No,b2,7,129C,CPT0188030003,Tumor,No,70,Female,Endometrioid carcinoma,G3 Poorly differentiated,3,162,71,27,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,5,Stage I,NA,1.93E-03,CNV_L,No,36.12,MSI-H,MSI-H,941,WT,Mutated,WT,Mutated,WT,6.33E-02,5.21E-02,1.05E-03,2.43E-18,0,7.86E-18,3.93E-18,1.20E-02,0,4.02E-19,0,0,5.70E-02,0,1.82E-02,2.09E-03,2.20E-18,6.16E-18,7.10E-19,3.76E-02,7.72E-03,2.86E-03,3.12E-03,4.92E-03,0,2.38E-17,5.80E-03,0,1.32E-17,1.53E-02,3.00E-02,2.63E-02,1.53E-02,0.147520061,0.179113386,2.46E-19,5.53E-02,1.04E-03,5.63E-02,8.60E-02,0,0.155100744,6.62E-02,0,9.28E-02,2.26E-02,0.12414492,0,0,0,8.89E-02,5.80E-03,0,0.125798131,0.206497607,2.62E-02,0,0,0,0,0,47,136,198,0,0,0,251,0,0,0,0,0.87,NA,NA,-1.406703087,-0.838490699,-2.850650419,0.232913116,-1.143521854,3.05E-02,-2.005926455,-0.301137126,1.390983307,-0.79777691,-2.206948571,-0.939906875,1.65E-02,-1.87197721,1502.789371,3084.663181,4587.452552,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,IHC staining not done,0,6,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Beto 2K|Fragmin|Dormicum|Fortrans|Kalipoz,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,364|670|1076,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,No|No|No,2|2|2,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,381.0|687.0|1093.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02978,C3N-02978,No,b4,16,131C,CPT0244470003.1,Tumor,No,66,Female,Endometrioid carcinoma,G2 Moderately differentiated,5.5,166,87,31,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,11,Stage I,NA,1.414147276,CNV_L,No,20.85,MSI-H,MSI-H,605,WT,Mutated,WT,Mutated,WT,3.49E-02,0,0,0,0,2.10E-17,1.77E-17,0,0,9.12E-19,1.19E-17,0,2.45E-02,5.44E-18,6.77E-20,2.37E-03,4.02E-18,1.66E-18,0,0,1.51E-02,9.18E-03,5.58E-03,2.14E-03,5.04E-20,1.35E-18,2.34E-03,1.37E-03,6.23E-18,2.99E-04,1.25E-02,1.11E-02,3.16E-17,5.13E-02,0.152476144,0,1.24E-02,3.15E-03,1.55E-02,0,0,4.88E-02,0,0,7.28E-02,0,4.28E-02,7.67E-03,0,0,1.06E-02,0,0.345744699,0,0.172960294,0,5.13E-02,0,0.143478492,4.80E-02,5.59E-02,16,0,244,0,0,0,0,86,0,0,69,0.79,Yes,No,0.110689746,-0.349973682,0.346445396,2.104449186,-0.25644425,-0.358640898,1.011673668,-0.676356199,0.862513819,-1.584755312,1.053743805,-1.272417748,2.435781627,-0.40816562,2505.531431,3522.469272,6028.000703,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,0,0,0,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,None,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record,Siofor|Faxolet ER 75|Ketiplet|Bisocard,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,253|665|1045,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Unknown|Never|Never,Never|Never|Never,No|No|No,Yes|No|No,None|None|None,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,253.0|665.0|1045.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02978-b1,C3N-02978,Yes,b3,12,129C,CPT0244470003,Tumor,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02979,C3N-02979,No,b2,6,130C,CPT0188100004,Tumor,No,61,Female,Endometrioid carcinoma,G3 Poorly differentiated,4,158,69,27,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,10,Stage I,NA,6.52E-04,CNV_L,No,30.64,MSI-H,MSI-H,1066,WT,Mutated,WT,Mutated,Mutated,0.186319434,3.23E-02,1.55E-02,0,0,5.86E-18,0,1.77E-02,6.83E-03,1.24E-02,4.67E-03,1.16E-03,2.49E-02,9.95E-19,5.09E-03,6.92E-03,2.33E-18,0,1.44E-17,1.20E-02,3.13E-02,3.85E-02,1.52E-03,8.79E-03,8.35E-20,0,1.25E-02,9.68E-20,0,2.76E-02,0.132744518,3.13E-02,0,0.193014788,0.327562554,7.38E-18,5.62E-02,3.46E-03,5.96E-02,1.32E-02,0,1.78E-02,0.105063395,0,0.168575072,7.18E-02,5.81E-02,0,0,4.96E-02,3.35E-02,0,4.23E-02,0.136629496,0.27830002,0,0,1.73E-03,0,0,2.32E-02,0,181,403,0,0,0,0,203,0,0,0,0.73,Yes,No,-1.558987383,-1.191964141,8.39E-02,0.199729184,-0.519301301,-1.31E-02,-0.282597715,-2.07108605,1.11361067,-1.13035857,-0.702183912,-0.756419985,7.45E-02,-1.467945044,3885.812031,5133.685775,9019.497805,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,0,0,4,0,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,"CK7 - negative, CK19 - negative, Chromogranine - negative, CD10 - negative, Synaptophizine - negative",NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record|Medical Record|Medical Record|Medical Record,Noliprol forte|Bisocard|Glucophage|vasilip|Fragmin|Citrafleet,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,393|647|976,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,Yes|Yes|Yes,2|2|2,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,435.0|689.0|1018.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-02994,C3N-02994,No,b3,11,131N,CPT0248000003,Tumor,No,68,Female,Clear cell carcinoma,G3 Poorly differentiated,2.5,162,73,27,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,16,Stage I,NA,1.664379605,CNV_L,No,32.2,MSI-H,MSI-H,1321,Mutated,Mutated,WT,Mutated,Mutated,0.228347918,2.20E-02,5.80E-22,1.63E-18,0,1.59E-18,3.70E-02,1.28E-03,0,0,0,2.19E-03,3.72E-02,0,2.41E-03,1.58E-03,5.91E-20,4.35E-18,0,0,7.37E-02,6.10E-02,2.87E-02,0,6.14E-19,1.21E-02,6.61E-03,0,5.07E-18,8.17E-02,4.64E-02,7.29E-03,1.26E-02,0.151115485,0.10084028,0,7.34E-02,3.15E-03,7.66E-02,1.35E-03,0,5.75E-02,5.95E-02,0,0.108649589,2.04E-02,7.76E-02,0,6.94E-03,0,6.12E-02,7.71E-02,7.83E-02,0.100178654,0.272419956,0,3.05E-02,0,1.69E-02,0,3.13E-02,470,177,0,0,0,0,369,0,0,0,0,0.51,NA,NA,-2.131794543,0.996114937,-2.512408626,1.397748065,2.192912044,0.380419575,0.75742671,0.973177248,0.686184746,-0.674988569,0.54577056,-0.317470907,0.509482109,-1.552645943,3840.106201,5653.733119,9493.839321,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Fundus,NA,Unifocal,IHC staining not done,0,1,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Negative,Negative,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,Yes,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,1,EMA (+),NA,No,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record,No,NA,NA,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,337|725|980,Yes|Yes|Yes,Yes|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,No|No|No,No|No|No,1|1|1,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,337.0|725.0|980.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-03005,C3N-03005,No,b2,5,129N,CPT0247940003,Tumor,No,>=90,Female,Endometrioid carcinoma,G2 Moderately differentiated,4,155,78,32,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,4,Stage I,NA,7.504663231,CNV_L,No,0,MSS,CNV_L,78,WT,Mutated,WT,WT,WT,0.136934637,6.63E-02,4.89E-03,0,0,3.28E-18,7.46E-02,9.11E-20,5.18E-03,2.82E-02,8.07E-19,3.44E-03,4.69E-02,0,1.15E-02,1.51E-02,0,1.45E-02,0,3.15E-02,2.90E-02,2.12E-02,2.06E-02,5.57E-03,3.12E-04,2.42E-04,0,5.78E-20,0,5.50E-03,1.89E-02,6.08E-02,0,3.47E-02,0.132437016,1.10E-19,7.85E-02,1.70E-02,9.54E-02,0,0,6.39E-02,9.01E-02,0,0.219648814,1.06E-02,4.97E-02,9.97E-03,0,7.07E-02,0,9.79E-03,9.54E-02,1.93E-02,0.208454493,5.24E-02,4.04E-02,0,3.38E-02,0,2.59E-02,21,27,0,0,0,0,26,0,0,0,0,0.31,NA,NA,1.09266565,-0.553515684,0.556683753,0.378119839,1.156744063,-1.497099808,0.488121317,0.740961007,0.606740987,0.377499539,0.47328784,1.241677203,-0.114117071,-0.541184904,5460.544855,5773.711888,11234.25674,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,4 or more,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Bisocard|Hygroton|Polocard,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Deceased,450|734|857,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,No|No|No,4 or more|4 or more|4 or more,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,450.0|734.0|n/a,n/a|n/a|Respiratory Disease,n/a|n/a|898.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-03415,C3N-03415,No,b2,7,130N,CPT0247910003,Tumor,No,85,Female,Endometrioid carcinoma,G3 Poorly differentiated,4.8,150,52,23,Present,=50% myometrial invasion,Seventh Edition (2010),pT1b (FIGO IB),pN0,5,Stage I,NA,0.146408992,CNV_L,No,32.41,MSI-H,MSI-H,6659,Mutated,Mutated,Mutated,Mutated,WT,0.209530632,1.80E-02,1.15E-02,0,7.17E-19,1.54E-18,7.86E-20,1.08E-02,5.20E-03,2.23E-02,0,1.85E-03,3.77E-02,0,1.62E-02,2.22E-19,1.43E-18,6.92E-18,0,7.01E-04,3.68E-02,1.76E-02,3.25E-02,5.86E-03,0,1.00E-18,3.88E-03,1.98E-18,0,1.65E-02,7.34E-02,2.24E-02,7.77E-03,0.151379251,0.314025643,0,5.47E-02,3.71E-18,5.47E-02,6.94E-03,0,3.52E-02,0.12580074,0,0.118640627,9.70E-02,5.93E-02,0,2.55E-02,0,6.34E-02,0,0,6.95E-02,0.268245225,1.69E-02,0,0,0.101829592,0,1.16E-02,161,314,0,0,0,0,0,0,1939,2445,1221,0.23,NA,NA,-1.136963415,-1.024358542,-2.286263556,1.218158501,-1.23439774,5.22E-02,-2.15E-02,-0.230024824,1.608784387,-2.005919563,-0.474275759,-0.900573473,1.600195685,-1.523080234,2384.810301,4565.22009,6950.030391,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,3,IHC staining not done,0,0,0,0,cM0,R0: No residual tumor,Positive : % Not available,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,Unknown,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Enarenal|Sustonit|Signopam,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Deceased,411|712|870,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,Tumor Free|Tumor Free|Tumor Free,Never|Never|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,No|No|No,Unknown|Unknown|Unknown,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,411.0|712.0|n/a,n/a|n/a|Respiratory Disease,n/a|n/a|1047.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-03417,C3N-03417,No,b4,13,128C,CPT0247880003,Tumor,No,67,Female,Endometrioid carcinoma,G1 Well differentiated,2.7,165,83,30.49,Present,<50% myometrial invasion,Seventh Edition (2010),pT1a (FIGO IA),pN0,5,Stage I,NA,1.04E-02,CNV_L,No,0.64,MSS,CNV_L,60,WT,Mutated,Mutated,Mutated,Mutated,3.73E-02,5.10E-03,1.87E-03,0,0,1.32E-17,1.41E-18,3.53E-03,4.49E-20,1.78E-02,0,1.77E-18,2.36E-02,0,0,0,0,0,6.46E-18,5.80E-03,1.34E-19,9.26E-19,1.96E-03,0,0,1.41E-17,1.39E-03,0,3.10E-18,1.63E-02,3.06E-02,5.60E-03,0,6.02E-02,0.289266356,2.25E-18,3.40E-03,0,3.40E-03,5.66E-02,0,9.17E-02,0.115563316,0,0.246878938,3.77E-02,7.77E-02,0,0,6.79E-02,0,0,0.112992327,6.16E-02,8.37E-02,1.10E-02,0,0,2.18E-02,1.49E-02,0,22,21,0,14,0,0,0,0,0,0,0,0.77,NA,NA,-0.684400881,0.561581065,-1.102347356,-8.92E-02,0.222665419,0.924786309,-1.105861496,-0.786586684,0.280656974,-0.713629763,-1.262334267,-1.974988541,1.698376833,0.241199831,2608.1265,3031.213167,5639.339667,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Posterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Positive : % Not available,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Loss of expression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Unknown,3,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,Medical Record|Medical Record|Medical Record,Yes,Medical Record|Medical Record|Medical Record,Bisoratio|Protevasc|Acard,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Living,346|710|1103,Yes|Yes|Yes,No|No|No,No|No|No,Tumor Free|Tumor Free|Tumor Free,Unknown|Unknown|Never,Never|Never|Never,Never|Never|Never,Yes|Yes|Yes,No|No|No,3|3|3,No|No|No,Complete Remission|Complete Remission|Complete Remission,Complete Remission|Complete Remission|n/a,n/a|n/a|Complete Remission,No|No|No,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,346.0|710.0|1103.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3N-03767,C3N-03767,No,b2,5,128N,CPT0247970003,Tumor,No,66,Female,Serous carcinoma,G3 Poorly differentiated,13,158,62,24,Present,=50% myometrial invasion,Seventh Edition (2010),pT2 (FIGO II),pN0,13,Stage II,NA,34.44230553,CNV_H,No,1.01,MSS,CNV_H,66,WT,WT,WT,WT,WT,7.67E-02,1.43E-02,0,4.98E-03,3.27E-19,8.11E-04,3.17E-02,5.32E-04,0,7.01E-20,3.05E-19,9.28E-03,0,1.42E-03,0,1.81E-02,0,1.06E-17,0,9.18E-03,2.56E-02,1.12E-02,1.70E-02,1.08E-02,2.18E-18,1.78E-03,5.02E-03,3.23E-19,0,3.02E-02,2.38E-02,9.99E-03,2.03E-18,4.92E-02,0.138446294,8.13E-19,3.50E-02,9.04E-03,4.40E-02,2.81E-02,0,2.94E-02,0.131642052,0,0,0,4.53E-03,0.100298365,9.25E-02,2.59E-02,0,2.34E-03,0.206982853,0,0.242741027,0,8.51E-02,0,1.92E-02,4.46E-03,2.68E-02,20,22,0,0,0,0,0,0,0,21,0,0.45,NA,NA,-1.129793055,1.027499579,-1.768589342,1.244398301,-1.277384025,0.623079578,-0.722419304,-0.979551296,-0.566171478,-0.655647341,-0.817136634,-0.858177651,1.45056114,-1.726708429,2578.671466,3457.536356,6036.207822,Not Examined,Not identified,Margins uninvolved by invasive carcinoma,NA,Caucasian,Poland,Anterior endometrium,NA,Unifocal,IHC staining not done,0,0,IHC staining not done,0,0,IHC staining not done,0,cM0,R0: No residual tumor,Negative,Negative,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,No,Cannot be determined,No,Postmenopausal: greater than 12 months since LMP with no prior Oophorectomy OR Prior bilateral oophorectomy,Never,2,NA,NA,No,Lifelong non-drinker,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,NA,NA,NA,NA,No or minimal exposure to secondhand smoke,NA,No,NA,NA,NA,NA,NA,NA,Yes,1,12 Months|24 Months|36 Months,No|No|No,Living|Living|Deceased,491|721|897,Yes|Yes|Yes,Yes|Yes|Yes,No|No|No,With Tumor|With Tumor|With Tumor,Never|Never|Never,Never|Never|Never,Never|Never|Never,No|No|No,No|No|No,2|2|2,No|No|No,Persistent Disease|Persistent Disease|Persistent Disease,Persistent Disease|Persistent Disease|n/a,n/a|n/a|Persistent Disease,Yes|Yes|Yes,Distant Metastasis|Distant Metastasis|Distant Metastasis,Lung|Lung|Lung,491.0|491.0|491.0,No|No|No,No|No|No,No|Yes|Yes,No|No|No,491.0|721.0|n/a,n/a|n/a|Malignant Neoplasm,n/a|n/a|1002.0,n/a|n/a|n/a,n/a|n/a|n/a,n/a|n/a|n/a
+C3L-00935-A,C3L-00935,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,8.87E-02,0,0,0,4.75E-18,0,4.89E-18,9.67E-03,1.09E-03,9.58E-03,5.63E-18,0,1.54E-02,4.97E-03,5.81E-03,0.120966395,0,0.504130901,1.83E-02,0.409030325,0,0,9.24E-04,0,0,0,0,0,0,0,0,7.02E-18,1.26E-17,5.67E-03,9.89E-02,0,4.60E-03,0.319778855,0.324382239,8.84E-02,0,3.65E-02,4.72E-02,0,0.148954717,0,4.14E-02,0,5.57E-02,0,5.62E-02,0,9.39E-02,4.29E-02,0.144343921,3.27E-02,0,0.211995968,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.211046408,-1.163818206,0.20088871,-1.271131044,0.656300892,-0.905376669,-0.302948854,-1.066028656,-2.124537065,0.943415918,-0.365934904,0.821306291,-1.985280952,-0.355157133,6048.885334,4277.403784,10326.28912,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-01277-A,C3L-01277,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3.26E-02,0,6.68E-19,3.42E-19,9.56E-20,6.75E-03,0,1.71E-02,8.33E-03,2.22E-19,2.12E-18,0,1.43E-02,2.30E-03,6.76E-19,0.210487924,1.36E-19,0.732908677,2.56E-02,0.652074717,0,4.40E-03,2.38E-03,1.20E-02,0,6.41E-18,0,0,7.02E-18,3.13E-02,1.88E-17,3.81E-18,2.09E-20,0,4.40E-02,0,1.36E-02,0.489786717,0.503358739,8.42E-02,0,1.23E-02,1.69E-02,0,0.288982746,0,6.66E-03,0,4.17E-02,0,5.03E-02,0,0,3.53E-03,0.248263085,0,7.70E-03,0.239400582,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-1.414135341,-1.020306365,-0.16799745,-1.532624523,-1.052620735,-0.863345831,-1.625708517,-1.829445817,-0.793469855,1.527938946,-1.844476202,0.504574412,-0.739065574,-1.986846148,6711.035705,4219.232716,10930.26842,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-01639-A,C3L-01639,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,9.51E-02,1.27E-18,2.41E-18,6.70E-03,1.27E-17,2.19E-03,6.43E-03,4.98E-03,6.63E-03,1.34E-02,1.43E-18,0,1.03E-02,9.42E-03,1.04E-02,0.140680315,5.28E-03,0.538364102,1.73E-02,0.445838095,0,0,5.73E-03,3.55E-20,2.21E-18,0,7.79E-19,0,4.61E-18,7.45E-03,0,3.96E-18,6.94E-19,1.03E-03,2.07E-02,2.70E-03,1.49E-02,0.352513988,0.367374104,7.97E-02,0,6.61E-02,5.99E-02,0,0.184571918,0,4.02E-02,0,2.33E-02,0,0.105593768,1.80E-03,5.71E-03,4.35E-02,0.177980921,3.66E-02,0,9.61E-02,6.56E-02,0,1.32E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.253623114,0.326370073,0.903270377,-1.10153486,-0.29478359,-0.297765172,0.587509746,-0.290579393,-2.813528775,1.350366552,0.625322857,1.027936076,-1.373760636,0.452931332,6912.831464,5165.054101,12077.88557,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-01967-A,C3L-01967,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,2.51E-18,0,0,0,0,1.75E-02,0,0,0,1.55E-17,1.89E-02,3.71E-03,0,0.194065816,1.68E-18,0.653919656,5.87E-02,0.727097981,4.03E-18,0,0,0,0,1.06E-17,0,0,0,0,2.49E-17,2.26E-18,5.04E-18,0,3.20E-02,5.93E-19,1.09E-17,0.435238368,0.435238368,2.20E-02,0,3.93E-03,5.64E-02,0,0.235822388,0,2.54E-02,0,6.07E-02,0,5.92E-02,0,0,7.13E-03,0.213152393,0,0,0.316206686,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.498955371,-1.794032951,-0.437990693,-1.550438395,-0.501700787,-1.504175763,-1.683139964,-1.677800851,-2.247384885,1.092535941,-1.739202793,5.31E-02,-0.63537576,-1.414576593,6344.945644,3855.2529,10200.19854,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02121-A,C3L-02121,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3.60E-02,0,7.87E-18,0,0,0,1.11E-17,4.01E-03,6.23E-03,1.65E-19,3.88E-18,1.68E-17,3.03E-02,9.22E-03,0,0.198581286,6.29E-18,0.600784169,3.40E-02,0.726487652,9.79E-19,9.63E-04,0,1.08E-03,1.29E-18,0,4.16E-18,3.44E-18,7.52E-18,2.15E-19,1.80E-17,1.06E-18,4.55E-18,1.25E-20,3.59E-02,4.92E-18,4.87E-03,0.410394644,0.415266171,0,0,0,2.19E-02,0,8.17E-02,0,0,0,4.96E-02,0,3.65E-02,4.16E-02,0,0,0.286802129,0,1.11E-02,0.470840163,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-1.029138117,-2.283354488,0.784495524,-1.626815622,-0.568082443,-2.144657328,-2.230521597,-2.521069728,-0.39842562,1.332206765,-2.33375145,0.33630669,-0.968764978,-2.306827196,6867.623231,4085.415341,10953.03857,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02216-A,C3L-02216,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,6.76E-02,0,0,2.40E-18,2.13E-17,0,3.95E-18,1.33E-02,4.72E-03,2.61E-03,0,4.06E-18,1.75E-02,4.72E-03,2.93E-03,9.29E-02,0,0.616006359,1.92E-03,0.564808115,0,3.11E-03,2.45E-03,5.75E-22,0,4.78E-18,2.62E-18,3.15E-04,0,3.26E-02,5.49E-18,7.71E-19,0,0,2.06E-03,1.63E-19,5.31E-03,0.361027326,0.366338058,8.05E-02,0,3.61E-02,4.47E-02,0,0.205984969,0,2.51E-02,0,0,0,6.05E-02,6.16E-02,0,1.50E-02,0.224371107,0,3.29E-03,0.217911571,1.62E-02,0,8.61E-03,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.129353198,-0.321850488,0.849211729,-1.218297996,-0.404627678,-0.554390972,0.287246403,-0.523223047,-2.125387181,1.425969954,0.182025103,0.766235421,-1.339316658,0.227294681,7125.299823,4777.841893,11903.14172,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02347-A,C3L-02347,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,5.44E-02,0,5.63E-18,2.35E-18,0,0,0,1.15E-02,4.17E-18,2.35E-19,2.26E-18,0,1.58E-02,6.17E-03,1.44E-19,0.251570738,8.02E-19,0.663621454,2.28E-02,0.841734884,3.27E-19,0,0,2.99E-19,0,0,0,0,2.17E-18,2.84E-19,0,7.88E-18,2.50E-17,0,2.51E-02,0,5.28E-18,0.471753809,0.471753809,0,0,9.32E-03,5.91E-02,0,0.185742456,0,0,0,3.04E-02,0,4.88E-02,1.08E-02,0,1.08E-02,0.30523825,4.07E-02,2.76E-03,0.288122833,0,0,8.40E-03,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-1.248399119,-2.079062633,-0.473255663,-1.453244388,-0.26348033,-1.617547424,-1.419313665,-1.297005685,-2.21004854,1.301062732,-1.502534757,0.530110014,-1.494404074,-1.797755369,6770.680361,4164.60188,10935.28224,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02353-A,C3L-02353,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2.39E-03,0,2.11E-21,3.39E-19,0,0,2.41E-17,6.05E-03,6.91E-19,3.70E-03,4.66E-18,0,7.02E-03,4.58E-03,2.03E-19,0.196339531,7.09E-18,0.619811332,2.17E-02,0.613034072,2.35E-19,0,0,8.56E-19,0,0,0,0,0,0,1.38E-17,0,0,0,2.30E-02,2.79E-18,6.05E-18,0.417245935,0.417245935,3.46E-02,0,2.64E-02,8.18E-02,0,0.228455458,0,1.61E-02,0,1.36E-02,0,5.06E-02,5.91E-03,0,5.70E-03,0.200181509,0,1.56E-02,0.321051161,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.271352996,-0.967337914,0.47433009,-1.450767193,0.134565201,-0.845521523,-0.713857878,-0.756074638,-2.413510083,1.286906112,-0.647963762,0.673471594,-0.601734164,-0.514545962,6425.985404,3846.190997,10272.1764,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02408-A,C3L-02408,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.131210134,0,0,0,1.98E-17,0,3.07E-03,1.26E-02,1.73E-02,8.71E-03,9.13E-18,0,1.64E-02,0,1.16E-02,0.133147777,9.13E-03,0.612616439,5.34E-02,0.766076189,1.35E-03,4.38E-03,1.16E-02,5.25E-03,1.07E-17,9.38E-03,0,1.94E-04,1.33E-17,0.110667849,4.24E-17,4.45E-20,0,8.63E-19,8.10E-02,7.44E-19,3.61E-02,0.37711775,0.413245881,2.40E-02,0,5.48E-02,7.51E-02,0,0.175123362,0,7.85E-03,0,5.31E-02,0,2.81E-02,1.62E-02,0,3.42E-03,0.236818568,0,4.88E-02,0.220394789,2.98E-02,0,2.64E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.933239197,0.913199637,1.178173157,-0.830313519,-0.12915265,0.646966436,0.94913735,-0.199865835,-1.088330724,1.888357198,0.62417164,1.730456151,-1.551065165,-0.566461594,7925.918641,6126.937728,14052.85637,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02409-A,C3L-02409,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3.27E-02,0,0,0,0,1.71E-17,6.17E-20,0,0,2.90E-18,2.31E-17,7.11E-18,1.52E-03,0,2.66E-03,5.09E-02,5.64E-18,0.322699883,0,0.383590369,9.21E-21,1.38E-02,1.48E-03,0,1.84E-18,0,0,0,5.15E-18,0,0,0,8.37E-18,0,2.57E-02,1.28E-18,1.77E-03,0.187915174,0.189685599,2.49E-02,0,7.29E-02,3.27E-02,0,0.159441905,0,0,0,2.36E-02,0,3.64E-02,0,0.271801413,8.48E-03,0.229752271,0,1.25E-02,0,0.105419365,0,2.21E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.9557728,0.548289566,3.41E-04,1.81E-02,-0.345797202,0.519290737,0.860105774,1.302085624,-0.522851349,1.325185321,0.993419738,1.372261072,-0.905048667,1.058334661,6222.880071,5060.837371,11283.71744,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02412-A,C3L-02412,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.215739227,0,0,0,7.75E-18,2.29E-02,3.15E-02,3.75E-03,0,1.45E-02,0,0,5.11E-03,0,1.83E-02,0.131728147,1.69E-02,0.526818772,2.43E-02,0.75770873,0,8.49E-03,3.65E-03,2.12E-03,1.55E-17,1.34E-02,0,4.06E-03,0,2.04E-02,1.28E-18,0,0,1.20E-17,0,0,3.65E-02,0.332106794,0.368611615,0,0,4.10E-02,6.86E-02,0,0.236196429,0,2.53E-03,0,1.36E-02,0,9.36E-02,1.22E-02,0,6.22E-04,0.261489771,0,5.20E-02,0.136986379,3.84E-02,7.78E-03,3.50E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.391845651,1.699940261,-0.116827,-0.776342791,0.14047024,0.94591908,1.106123198,1.513517672,-0.763164399,2.054977063,1.063772836,2.407469148,-1.577275359,-0.439496296,7712.295621,6740.407053,14452.70267,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02468-A,C3L-02468,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,4.40E-02,0,0,1.50E-19,2.01E-19,9.38E-18,0,6.12E-03,6.12E-03,3.20E-19,1.01E-18,1.59E-20,1.22E-02,8.23E-03,7.28E-20,6.92E-02,0,0.641342513,1.25E-02,0.56595538,1.10E-18,4.45E-03,1.46E-03,1.39E-18,0,0,0,0,0,1.09E-02,4.26E-18,0,0,2.44E-02,4.23E-02,5.42E-20,4.08E-03,0.358160588,0.362237931,6.26E-03,0,3.40E-03,6.08E-02,0,0.212626096,0,4.29E-02,0,0,3.38E-02,6.22E-02,2.67E-02,0,6.89E-03,0.208514434,0,8.91E-03,0.327089814,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.140236289,-2.88716034,-0.996508995,-1.369861021,-0.904218543,-2.590928371,-2.028762638,-1.785443699,-1.00904764,1.762117684,-2.0353135,0.610363391,-0.662719021,-1.837209585,6558.709881,3764.485072,10323.19495,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02557-A,C3L-02557,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,7.24E-02,0,0,0,4.43E-19,0,0,1.66E-02,5.37E-02,3.97E-02,0,0,2.43E-02,0,5.30E-03,0.103708401,0,0.689665295,9.42E-03,0.501280037,3.24E-03,5.47E-03,1.43E-02,2.43E-03,0,0,0,1.10E-20,0,5.43E-02,1.78E-17,0,0,0,7.89E-02,1.47E-02,4.31E-02,0.405916254,0.449001326,0,0,4.09E-02,8.16E-02,0,0.293708179,0,1.05E-03,0,6.00E-02,0,7.65E-02,0,2.37E-02,1.33E-02,0.184846121,0,3.90E-02,0.178316573,0,0,7.03E-03,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.151016253,-0.413968182,-0.54664953,-1.436754293,-0.678421068,-0.359791585,-0.688589313,-1.235313253,-1.042289823,1.841018131,-0.756341535,0.531813177,-1.049237752,-0.438256446,6770.994541,5047.086005,11818.08055,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3L-02602-A,C3L-02602,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3.99E-02,0,1.01E-17,0,0,3.78E-18,0,6.85E-03,2.87E-19,4.29E-19,9.01E-18,0,2.69E-02,7.70E-03,1.91E-20,0.168002798,0,0.596503979,2.34E-02,0.658958354,2.17E-18,0,0,3.12E-19,0,2.74E-17,0,1.33E-18,4.09E-18,9.92E-19,0,0,0,7.79E-18,2.69E-02,1.00E-19,2.37E-17,0.39488576,0.39488576,3.08E-02,0,3.81E-03,8.51E-02,0,0.224482124,0,3.42E-03,0,0,0,3.26E-02,1.22E-02,0,9.14E-03,0.218299057,3.49E-04,3.13E-03,0.345976918,0,3.07E-02,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-1.05E-03,-1.691415127,0.425786212,-1.40327774,-0.337226582,-2.017029214,-1.742841389,-1.238365244,-2.713352836,1.463258594,-1.683314273,0.723022501,-1.340393444,-0.795094283,7044.501377,4183.202184,11227.70356,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-00520-A,C3N-00520,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3.25E-02,0,1.82E-20,3.60E-18,8.99E-18,0,9.51E-03,0,0,4.08E-19,7.26E-18,2.38E-18,0,0,3.03E-03,5.37E-03,3.79E-19,0.237411611,2.19E-18,0.197726532,1.12E-19,5.33E-03,3.27E-04,0,2.35E-18,0,8.43E-18,4.82E-19,0,3.53E-03,0,6.48E-03,0,0,2.61E-18,2.95E-03,2.02E-03,0.122308489,0.124328306,2.97E-02,0,0,2.12E-02,0,0.384109239,0,5.95E-02,0,0,0,1.76E-02,4.65E-02,0,3.33E-03,0.207351134,0.108566413,3.42E-02,5.56E-02,3.24E-02,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3.504331272,-0.178460747,-0.242888136,-1.144597295,-1.378702931,-0.104992313,-0.682985158,-0.109862444,-0.900703564,1.569574495,-0.680011241,0.191760782,-1.240880903,1.242117843,5840.467476,3924.20137,9764.668846,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-00859-A,C3N-00859,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.41E-02,2.44E-03,0,5.55E-18,0,2.08E-18,2.28E-02,2.76E-19,3.41E-18,0,9.71E-19,4.00E-03,6.01E-04,1.09E-18,0,0,4.01E-03,1.39E-02,0,1.90E-02,0,1.55E-03,3.72E-04,4.72E-03,6.62E-18,0,2.52E-03,0,8.03E-20,8.13E-03,2.75E-02,3.47E-03,4.18E-18,5.09E-02,5.70E-02,0,7.45E-03,9.41E-03,1.69E-02,0,0,5.48E-02,6.99E-02,0,0.12977877,0,0,3.22E-02,1.28E-03,0,3.68E-02,0,0.185643271,0,0.345265235,0,7.03E-02,0,1.48E-02,3.14E-02,2.78E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.465643345,1.564356068,0.956532568,1.233825599,-0.479072708,0.80443949,-0.785553003,2.230841656,0.570570382,-0.200093478,-0.626182932,-0.287049877,0.97846244,-0.554310802,3038.923669,2644.370039,5683.293707,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-00872-A,C3N-00872,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.177926548,0,0,4.35E-19,0,2.36E-18,4.61E-02,1.59E-02,8.12E-02,7.17E-02,4.59E-03,3.21E-18,1.55E-02,0,1.46E-02,7.39E-02,7.07E-04,0.524513105,3.22E-03,0.437779233,1.83E-02,2.14E-02,2.07E-02,6.91E-03,6.79E-18,1.61E-03,0,1.05E-03,4.56E-19,1.73E-02,8.81E-18,1.46E-03,0,1.82E-17,6.40E-02,0,8.30E-02,0.307491693,0.390445033,2.31E-02,0,5.50E-02,0.149473131,0,0.215750488,0,1.26E-02,0,4.99E-02,0,8.63E-02,0,1.62E-04,4.00E-02,0.207097987,3.59E-04,6.61E-04,1.61E-02,0.132943273,0,1.05E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.377115669,-0.278664379,0.248138947,-0.697537837,0.103318115,-0.869483008,0.51153038,-0.366900549,-1.082567936,1.870398552,0.314260087,1.989293078,-1.233601392,0.555595171,7832.470038,6278.029107,14110.49914,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01344-A,C3N-01344,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.102277861,9.95E-19,0,0,5.65E-19,0,0,0,7.42E-18,0,2.92E-19,7.67E-19,0,2.12E-18,2.19E-20,0,2.71E-18,0,6.62E-19,1.64E-02,3.31E-19,1.11E-02,1.99E-03,0,0,0,1.13E-19,0,5.27E-18,0,2.35E-02,3.71E-03,0,7.21E-02,1.39E-03,1.14E-18,1.15E-17,0,1.15E-17,4.42E-02,0,3.47E-02,1.21E-02,0,0.11682117,0,9.91E-02,0,1.86E-02,0,7.62E-02,0,8.96E-02,4.66E-03,0.200680109,0,0.221806886,2.34E-02,0,0,5.81E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.762226593,0.349684456,-0.396812251,0.137712717,2.940347545,0.233443073,0.117049522,1.38648037,-0.259023813,-0.197161799,0.244119482,-0.30886802,0.267114696,1.068768281,3031.933481,3776.546198,6808.479679,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01525-A,C3N-01525,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.255764541,0,0,1.25E-17,8.83E-18,6.39E-21,3.67E-02,5.06E-03,7.04E-18,7.64E-03,0,2.09E-18,9.16E-03,4.97E-03,9.95E-03,9.82E-02,3.28E-03,0.400281979,9.36E-03,0.622992964,0,1.08E-02,5.67E-03,1.11E-02,1.84E-17,5.31E-02,0,1.02E-02,2.61E-18,2.39E-03,0,0,0,0,0,8.84E-20,5.84E-02,0.262314399,0.320723703,0,0,3.69E-02,0.104719513,0,0.124913255,0,3.22E-02,0,0,0,5.99E-02,0.120359669,0,6.11E-03,0.198008067,0,3.18E-02,0,0.232313931,0,5.28E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2.033936923,2.651293378,0.130626246,-0.888300351,0.121750323,1.882601204,1.358664058,2.030387319,0.629703017,1.968475431,1.398238035,1.877116406,-0.67029527,-0.45024582,7407.103417,7035.151955,14442.25537,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01526-A,C3N-01526,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,5.86E-02,0,0,6.55E-18,1.78E-17,3.39E-02,2.15E-02,1.39E-03,2.48E-18,0,0,0,4.93E-03,1.25E-02,5.72E-04,7.17E-02,0,0.382016661,6.23E-03,0.513474028,0,8.67E-04,3.77E-05,4.35E-03,6.85E-18,0,8.13E-18,0,1.23E-19,2.88E-02,0,0,3.80E-18,1.94E-20,0,0,3.28E-03,0.229137087,0.232417684,1.63E-02,0,6.06E-02,0.100744808,0,0.144147398,0,5.08E-02,0,1.50E-02,0,3.89E-02,7.96E-02,0,1.07E-02,0.174271138,1.76E-02,3.91E-03,0.252950634,3.44E-02,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.198583945,2.092040656,0.754697088,-1.20E-02,-0.595248117,1.231009441,-0.209081688,2.185977047,-0.587437631,1.604530189,-0.158902509,1.487404635,-0.744430111,-0.263525223,6428.034564,4843.35425,11271.38881,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01761-A,C3N-01761,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,5.88E-02,0,1.14E-17,0,7.45E-19,3.44E-18,2.06E-17,6.86E-03,0,5.95E-19,2.40E-17,1.23E-17,1.56E-02,1.12E-02,0,0.185705141,8.74E-18,0.70847269,2.13E-02,0.762720694,0,0,3.99E-03,0,0,7.45E-18,0,1.41E-19,0,2.77E-03,2.69E-17,6.24E-18,0,0,2.75E-02,0,1.14E-17,0.458361628,0.458361628,4.18E-03,0,2.71E-03,9.25E-02,0,0.163461823,0,3.52E-02,0,0,0,6.84E-02,2.17E-02,0,7.19E-03,0.229998202,0,6.93E-03,0.347617108,0,0,2.02E-02,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.86886259,-1.138641717,0.287104266,-0.870810758,-0.666947676,-0.992998113,-0.937120032,-1.07122901,-1.374719452,1.629555029,-0.990817573,0.911391458,-0.68118662,-1.65385842,6960.593486,4245.227399,11205.82088,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01765-A,C3N-01765,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.385709251,6.09E-03,1.56E-02,3.96E-02,3.45E-19,1.13E-17,0.132035182,9.77E-03,0.176715243,0.18409375,8.30E-02,1.09E-20,1.04E-02,1.88E-02,2.46E-02,0.145543369,2.34E-03,0.528764457,3.27E-02,0.747813912,2.22E-02,1.99E-02,2.12E-02,1.58E-02,0,4.84E-02,7.60E-19,0,6.26E-18,1.26E-02,3.22E-02,2.74E-03,1.00E-18,1.24E-17,0.116741345,1.01E-17,0.197389264,0.339351587,0.536740851,2.65E-02,6.61E-03,1.46E-02,0.15082551,0,0.167463768,0,5.28E-02,0,9.98E-02,0,6.23E-03,4.94E-02,0,4.97E-02,0.227961499,4.90E-03,0,0.141955202,0,0,1.41E-03,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.455078744,-0.711693628,0.125925848,-1.369280232,1.206095225,-0.823642015,0.17434572,0.839653304,-0.973497197,1.438362627,8.09E-02,3.074014285,-1.798089975,-0.327055551,8322.649314,8591.06139,16913.7107,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01830-A,C3N-01830,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.206049786,0,0,6.52E-18,9.26E-18,0,3.82E-02,1.10E-03,1.81E-18,2.80E-02,0,0,6.12E-03,1.32E-02,9.32E-03,9.07E-02,0,0.312759043,7.72E-03,0.532419328,1.98E-04,1.29E-02,5.02E-03,2.36E-03,8.98E-18,7.21E-02,0,2.59E-03,2.05E-17,9.62E-03,8.70E-03,1.32E-18,1.08E-18,3.18E-20,1.88E-03,0,5.77E-02,0.20356628,0.261302494,1.37E-02,0,4.02E-02,7.65E-02,0,0.148369365,0,3.88E-02,0,5.63E-02,0,6.91E-02,0.171119695,0,1.79E-02,0.192388542,0,4.98E-02,0.12316596,0,0,2.64E-03,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2.419634611,1.876637456,0.649845366,-0.370041125,0.894831163,1.161911614,0.627432791,2.370514834,0.107487926,1.367786114,0.628101077,1.273370183,-1.308397785,-0.25367492,7151.535829,6874.816632,14026.35246,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01847-A,C3N-01847,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.102885407,0,4.31E-19,0,6.29E-18,9.14E-03,4.00E-02,3.76E-03,0,4.87E-03,0,0,7.63E-03,1.49E-02,6.81E-03,8.98E-02,3.11E-18,0.33842921,1.26E-02,0.562435992,0,4.17E-03,1.68E-03,0,0,2.67E-03,0,4.88E-20,0,0,1.20E-18,0,3.39E-18,2.25E-03,1.05E-03,0,6.32E-03,0.22260115,0.228918247,0,0,5.06E-02,7.79E-02,0,0.231474007,0,1.35E-02,0,1.29E-02,0,3.91E-02,5.89E-02,0,5.99E-03,0.212188281,3.01E-02,3.57E-03,0.263800798,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.285683602,0.724246944,1.154987566,0.23395647,-0.628356107,2.90E-02,-1.317790146,2.580999462,2.71E-02,1.586765752,-1.279775131,1.978789566,-1.242402283,-0.608472878,6624.531234,5123.782744,11748.31398,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01848-A,C3N-01848,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,5.79E-02,0,0,3.32E-20,1.30E-17,1.16E-19,0,2.18E-02,2.55E-02,1.73E-02,1.47E-02,0,2.17E-02,8.33E-03,1.97E-21,0.18631007,0,0.708512811,1.99E-02,0.661234441,0,0,4.60E-03,9.79E-03,0,3.89E-18,0,2.84E-20,0,2.65E-03,1.13E-17,0,1.58E-17,6.08E-19,5.46E-02,2.48E-19,2.36E-02,0.461013316,0.484574865,1.10E-02,0,7.10E-03,0.185612984,0,8.88E-02,0,2.53E-02,0,9.09E-02,0,6.52E-02,0,0,1.02E-02,0.237784526,0,1.70E-04,0.277873347,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.86365618,-1.952859621,-0.794481774,-1.654868168,-0.407088475,-1.811995396,-2.163529808,-0.966593332,-2.124450354,0.894905781,-2.250886349,0.546515439,-0.794140345,-1.299150993,6683.097273,4739.418878,11422.51615,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01875-A,C3N-01875,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.113201071,0,4.20E-18,4.70E-19,0,4.32E-18,1.69E-17,1.88E-02,1.29E-20,1.23E-03,1.23E-17,0,2.07E-02,2.55E-03,0,0.183214677,0,0.711090553,1.47E-02,0.858786829,0,2.02E-19,6.78E-03,3.93E-19,0,1.73E-17,0,0,2.94E-17,2.30E-02,4.73E-18,0,0,0,3.75E-02,0,3.14E-17,0.463558596,0.463558596,6.09E-04,0,1.43E-03,0.130411976,0,0.18997322,0,0,0,9.17E-02,0,5.32E-02,0,0,2.83E-02,0.286766108,2.95E-03,6.83E-03,0.207867461,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-1.881045234,-1.764750125,-0.960213565,-1.473857394,-0.269530666,-1.616907676,-1.224553639,-1.519633052,-1.106528514,1.67168485,-1.460613681,1.8269094,-0.534452195,-1.734938404,7040.933592,4677.219522,11718.15311,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-01876-A,C3N-01876,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,4.60E-02,0,5.77E-18,0,0,0,2.24E-17,2.00E-03,0,0,0,0,1.14E-02,7.48E-03,0,0.203828462,7.68E-18,0.744627412,1.92E-03,0.761638386,1.58E-18,0,8.17E-03,8.26E-20,1.95E-18,0,1.33E-17,0,0,2.07E-19,3.02E-17,0,3.07E-17,0,3.01E-02,1.96E-17,6.23E-18,0.483579107,0.483579107,3.99E-03,0,1.49E-03,7.14E-02,0,0.169175108,0,0,0,0,0,3.54E-02,4.04E-02,0,1.75E-02,0.304348025,4.83E-02,0,0.308025277,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.90297969,-1.859797623,-1.078962703,-1.424993583,-0.531427809,-1.500026591,-1.234465821,-0.936862133,-1.326872345,2.1629105,-1.013113426,0.763534292,-1.391299068,-0.97876193,7368.245386,4074.900599,11443.14599,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02249-A,C3N-02249,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.39E-02,0,0,1.37E-18,0,0,5.66E-18,3.93E-03,0,4.36E-19,1.26E-17,0,8.49E-03,1.49E-03,0,0.245939725,0,0.653338677,2.35E-02,0.58301948,1.59E-18,0,1.69E-03,1.83E-18,0,3.39E-17,0,0,0,2.95E-18,1.02E-17,7.83E-18,0,1.24E-18,2.21E-02,3.09E-19,2.49E-17,0.461550321,0.461550321,7.48E-02,0,2.44E-03,3.39E-02,0,0.20711663,0,0,0,4.48E-02,0,8.92E-02,5.42E-03,0,0,0.183739041,0,1.73E-02,0.341239947,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-3.30E-02,-0.713369181,0.827332684,-1.171545177,-0.566017522,-0.554248023,-0.507234302,-1.22575424,-2.048022745,1.602198941,-0.517442781,-0.10510676,-0.907217907,-1.378538017,6705.846898,3992.943045,10698.78994,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02253-A,C3N-02253,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,7.76E-03,0,0,7.31E-19,0,1.22E-17,0,6.64E-03,9.94E-18,3.80E-19,2.47E-17,1.70E-17,2.70E-02,1.00E-19,9.56E-20,0.215494628,1.18E-18,0.714824282,3.35E-03,0.813385121,5.30E-18,7.62E-19,1.79E-19,9.05E-18,0,4.48E-17,0,3.75E-19,0,0,0,0,1.05E-17,0,1.87E-02,6.55E-18,4.71E-17,0.477455721,0.477455721,0.111946205,0,1.30E-02,9.60E-02,0,0.171093134,0,2.69E-02,0,0,0,4.75E-02,3.85E-02,0,2.51E-03,0.25069308,0,6.95E-03,0.209816826,0,2.51E-02,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.437915715,-1.367327175,-8.48E-02,-1.390806733,-0.576586175,-0.822444413,-1.62499903,-1.579552741,-1.993526397,1.431854478,-1.623004552,-0.600977951,-1.385853161,-0.922023925,6443.253312,3497.954021,9941.207333,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+C3N-02436-A,C3N-02436,No,NA,NA,NA,NA,Adjacent_normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2.82E-02,0,3.68E-18,7.18E-19,5.58E-19,2.23E-19,7.12E-18,6.03E-19,4.64E-19,1.58E-18,7.00E-18,0,1.39E-02,8.95E-03,1.65E-03,0.12703206,1.84E-18,0.632200887,1.83E-02,0.434250372,0,6.18E-03,1.97E-04,3.96E-19,1.11E-18,2.64E-19,0,0,0,1.47E-02,0,0,0,8.13E-03,5.01E-02,0,1.10E-03,0.38571288,0.386810315,2.31E-02,0,0,1.30E-02,0,0.12179712,0,2.76E-02,0,0,0,5.82E-02,0.11396485,0,0,0.282291703,2.06E-02,2.12E-02,0.318086315,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.368084211,-0.723376975,1.694320365,-1.347477111,-0.770164913,-0.969217066,-1.64874165,-1.324307168,-0.970939319,2.305155839,-1.54215058,-0.14394336,-1.25664674,-1.143129981,7156.937295,4065.450121,11222.38742,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX019,NX019,No,b4,13,128N,CPT0203740002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX020,NX020,No,b2,6,129C,CPT0234180002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX021,NX021,No,b1,1,130N,CPT0234220002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX022,NX022,No,b3,10,129N,CPT0234300004,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX023,NX023,No,b3,11,127C,CPT0230070003,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX024,NX024,No,b1,1,131N,CPT0230290003,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX025,NX025,No,b3,9,131N,CPT0230310002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX026,NX026,No,b2,8,129N,CPT0230370002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX027,NX027,No,b1,3,131C,CPT0188640005,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX028,NX028,No,b3,12,130C,CPT0186260002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX029,NX029,No,b4,14,129C,CPT0231200003,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX030,NX030,No,b2,5,130N,CPT0231200002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX031,NX031,No,b2,7,128N,CPT0292690003,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX032,NX032,No,b1,2,130N,CPT0313410002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX033,NX033,No,b4,15,127C,CPT0313550002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX034,NX034,No,b1,4,129N,CPT0313400002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX035,NX035,No,b1,3,129C,CPT0313420003,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX036,NX036,No,b2,7,129N,CPT0313490002,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX037,NX037,No,b4,16,130C,CPT0313420007,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NX038,NX038,No,b2,6,131C,CPT0313420004,Enriched_Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/examples/datasets/Dou-ucec-discovery.csv b/examples/datasets/Dou-ucec-discovery.csv
new file mode 100644
index 00000000..c99b6b34
--- /dev/null
+++ b/examples/datasets/Dou-ucec-discovery.csv
@@ -0,0 +1,154 @@
+idx,Proteomics_Participant_ID,Case_excluded,Proteomics_TMT_batch,Proteomics_TMT_plex,Proteomics_TMT_channel,Proteomics_Parent_Sample_IDs,Proteomics_Aliquot_ID,Proteomics_Tumor_Normal,Proteomics_OCT,Country,Histologic_Grade_FIGO,Myometrial_invasion_Specify,Histologic_type,Treatment_naive,Tumor_purity,Path_Stage_Primary_Tumor-pT,Path_Stage_Reg_Lymph_Nodes-pN,Clin_Stage_Dist_Mets-cM,Path_Stage_Dist_Mets-pM,tumor_Stage-Pathological,FIGO_stage,LVSI,BMI,Age,Diabetes,Race,Ethnicity,Gender,Tumor_Site,Tumor_Site_Other,Tumor_Focality,Tumor_Size_cm,Estrogen_Receptor,Estrogen_Receptor_%,Progesterone_Receptor,Progesterone_Receptor_%,MLH1,MLH2,MSH6,PMS2,p53,Other_IHC_specify,MLH1_Promoter_Hypermethylation,Num_full_term_pregnancies,EPIC_Bcells,EPIC_CAFs,EPIC_CD4_Tcells,EPIC_CD8_Tcells,EPIC_Endothelial,EPIC_Macrophages,EPIC_NKcells,EPIC_otherCells,CIBERSORT_B _cells _naive,CIBERSORT_B _cells _memory,CIBERSORT_Plasma _cells,CIBERSORT_T _cells _CD8,CIBERSORT_T _cells _CD4 _naive,CIBERSORT_T _cells _CD4 _memory _resting,CIBERSORT_T _cells _CD4 _memory _activated,CIBERSORT_T _cells _follicular _helper,CIBERSORT_T _cells _regulatory _(Tregs),CIBERSORT_T _cells _gamma _delta,CIBERSORT_NK _cells _resting,CIBERSORT_NK _cells _activated,CIBERSORT_Monocytes,CIBERSORT_Macrophages _M0,CIBERSORT_Macrophages _M1,CIBERSORT_Macrophages _M2,CIBERSORT_Dendritic _cells _resting,CIBERSORT_Dendritic _cells _activated,CIBERSORT_Mast _cells _resting,CIBERSORT_Mast _cells _activated,CIBERSORT_Eosinophils,CIBERSORT_Neutrophils,CIBERSORT_Absolute _score,ESTIMATE_StromalScore,ESTIMATE_ImmuneScore,ESTIMATE_ESTIMATEScore,Stemness_score,ER_ESR1,PR_PGR,Pathway_activity_EGFR,Pathway_activity_Hypoxia,Pathway_activity_JAK.STAT,Pathway_activity_MAPK,Pathway_activity_NFkB,Pathway_activity_PI3K,Pathway_activity_TGFb,Pathway_activity_TNFa,Pathway_activity_Trail,Pathway_activity_VEGF,Pathway_activity_p53,TP53_ATM,TP53_CHEK2,TP53_MDM4,TP53_RPS6KA3,TP53_TP53,TP53_pathway,PI3K_AKT1,PI3K_AKT2,PI3K_AKT3,PI3K_DEPDC5,PI3K_DEPTOR,PI3K_INPP4B,PI3K_MAPKAP1,PI3K_MLST8,PI3K_MTOR,PI3K_NPRL2,PI3K_NPRL3,PI3K_PDK1,PI3K_PIK3CA,PI3K_PIK3CB,PI3K_PIK3R1,PI3K_PIK3R2,PI3K_PPP2R1A,PI3K_PTEN,PI3K_RHEB,PI3K_RICTOR,PI3K_RPS6,PI3K_RPS6KB1,PI3K_RPTOR,PI3K_STK11,PI3K_TSC1,PI3K_TSC2,PI3K_pathway,HRD_BRCA1,HRD_BRCA2,HRD_BRCA1_or_BRCA2,CNV_DEL,CNV_AMP,CNV_class,CNV_idx,CNV_1q_DEL,CNV_3q_DEL,CNV_4q_DEL,CNV_1q_AMP,CNV_3q_AMP,CNV_4q_AMP,Purity_Immune,Purity_Cancer,Purity_Stroma,MSI_status,POLE_subtype,JAK1_MS_INDEL,JAK1_Mutation,Log2_variant_per_Mbp,Log2_SNP_per_Mbp,Log2_INDEL_per_Mbp,Log2_variant_total,Log2_SNP_total,Log2_INDEL_total,Genomics_subtype,Mutation_signature_C>A,Mutation_signature_C>G,Mutation_signature_C>T,Mutation_signature_T>C,Mutation_signature_T>A,Mutation_signature_T>G,WXS_normal_sample_type,WXS_normal_filename,WXS_normal_UUID,WXS_tumor_sample_type,WXS_tumor_filename,WXS_tumor_UUID,WGS_normal_sample_type,WGS_normal_UUID,WGS_tumor_sample_type,WGS_tumor_UUID,RNAseq_R1_sample_type,RNAseq_R1_filename,RNAseq_R1_UUID,RNAseq_R2_sample_type,RNAseq_R2_filename,RNAseq_R2_UUID,miRNAseq_sample_type,miRNAseq_UUID,Methylation_available,Methylation_quality
+S001,C3L-00006,No,2,5,128N,C3L-00006-01,CPT0001460012,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,1,38.88,64,No,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Unifocal,2.9,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,1,0.004595748,0.045125791,0.075643165,0.034381469,0.047247678,0.002878929,6.39E-10,0.79012722,0.010443511,0,0.017867521,0.036816257,0,0.088247615,0,0.035408717,0,0,0,0.020942068,0,0.024896194,0.047057934,0.072523873,0,0.00612096,0,0,0,0.00967535,0.37,2821.088429,4885.608881,7706.69731,0.767043727,0.769,0.253,-1.17,0.09,0.93,-0.7,0.04,1.67,-0.75,-0.01,-1.09,1.14,-0.67,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,0,1.43E-05,0.038877665,CNV_LOW,1.292639933,-0.016886191,-0.023339852,-0.023494917,0.005761569,0.009796749,0.014250962,0.060494906,0.730880164,0.208624931,MSI-H,No,WT,WT,5.090218957,5.014858245,1.450879273,10.06204614,9.984418459,5.832890014,MSI-H,8.300395257,1.482213439,72.52964427,14.42687747,1.383399209,1.877470356,Blood_normal,92b5e534-6cb0-43eb-8147-ce7d18526f5e_gdc_realn.bam,53dbb4d6-ce9d-4145-a33a-0c4d67e655df,Tumor,d9975c5f-288d-417d-bdb3-f490d9a36401_gdc_realn.bam,96c07fe5-d530-4c9b-92ce-49408feacb2c,Blood_normal,9f29ebe1-de5d-47a8-a54d-d1e8441409c6,Tumor,457f2c4d-ddf3-416e-bb50-b112eede02d5,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_TAGCTT_S17_L005_R1_001.fastq.gz,8a1efc47-1c29-417f-a425-cdbd09565dcb,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_TAGCTT_S17_L005_R2_001.fastq.gz,8c3fe9b7-7acd-4867-8d9c-a8e5d1516eda,Tumor,37bcba98-1094-459e-83ae-c23a602416fb,YES,PASS
+S002,C3L-00008,No,4,16,130N,C3L-00008-01,CPT0001300009,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage IV,IA,0,39.76,58,No,White,Not-Hispanic or Latino,Female,Posterior endometrium,NA,Unifocal,3.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Cannot be determined,1,0.005666428,0.056677626,0.100222966,0.041562969,0.056081262,0.001627258,4.15E-09,0.738161488,0.004411131,0,0.00176099,0.002130758,0,0.026435753,0,0.014944149,0,0,0,0.004934448,0.00385161,0.016040522,0.00394293,0.027211297,0.010407182,0.007160341,0,0.015087431,0.011681456,0,0.15,2714.248104,3632.199987,6346.448091,0.752796065,-0.302,-0.0242,0.17,-0.32,-0.91,0.65,-0.54,2.41,-0.87,-0.07,-1.6,2.14,-0.53,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,9.31E-06,1.17E-05,CNV_LOW,0.118480401,0,-0.002698584,-0.01112554,0.000978501,0,0,0.082629765,0.715223373,0.202146861,MSI-H,No,WT,WT,3.941606385,3.451287679,2.440219926,8.861086906,8.330916878,7.169925001,MSI-H,14.64174455,2.803738318,64.48598131,15.26479751,0.934579439,1.869158879,Blood_normal,243bfb3c-d06b-4de5-a6c3-7fa7e2c5fb74_gdc_realn.bam,90257622-6bcc-43e7-a04f-f7397975d038,Tumor,f6924a26-a14f-45a3-b4bd-7a4592d34065_gdc_realn.bam,4501f315-bf8f-4611-821a-797ef180e25b,Blood_normal,846bf455-89b4-4840-b113-e529ffa13277,Tumor,1c0e0f84-4caf-4493-9b2f-8f5f9ef9231b,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_GGCTAC_S22_L007_R1_001.fastq.gz,555725e8-cba5-4676-9b0a-80100cbf9f47,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_GGCTAC_S22_L007_R2_001.fastq.gz,15235b12-b67a-4678-acc4-ed03d642bd5e,Tumor,492b50d8-ec35-46e7-a65d-06512aaee394,YES,PASS
+S003,C3L-00032,No,1,2,131,C3L-00032-01,CPT0001420009,Tumor,No,United States,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,51.19,50,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,4.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,4 or more,0.006418323,0.067375618,0.127155905,0.031354942,0.050682921,0.004971567,2.10E-10,0.712040724,0.039852399,0,0.050305497,0.043745478,0,0.217047308,0,0.001314347,0,0,0.018425297,0,0,0.122233997,0.020499422,0.049193367,0.021836951,0.039683645,0,0.033824616,0.027104312,0.094933366,0.78,3856.501277,6602.912323,10459.4136,0.570730195,-0.1,0.0767,-0.02,0.24,0.4,-0.25,1.25,0.16,-0.61,1.63,1.67,-0.9,0.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,5.93E-05,0.000243818,CNV_LOW,0.174427719,-0.001375124,-0.003122768,-0.005361581,0.003805798,0.003557554,0.002393507,0.260963289,0.365434419,0.373602292,MSS,No,WT,WT,1.142367912,0.970671335,0.319245674,5.321928095,5,3.169925001,CNV_low,16.12903226,3.225806452,70.96774194,3.225806452,3.225806452,3.225806452,Blood_normal,97e153c1-7be4-4d7b-9d30-9545b56e6ddb_gdc_realn.bam,75a4369c-5247-427d-aabc-3157d08f2d8f,Tumor,9de43644-2816-4fac-8e1e-b49b18b2707b_gdc_realn.bam,0ec56b8f-f64a-401e-be41-d873fa31776b,Blood_normal,a16b07d8-46c1-4fd9-8204-4f866aacfbec,Tumor,b93693c0-b88d-43d6-9b8b-97c8269982cc,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_GTCCGC_S18_L005_R1_001.fastq.gz,9ae968f3-691d-4db3-9977-1ab3e5af9085,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_GTCCGC_S18_L005_R2_001.fastq.gz,423b6b09-02aa-4f47-9241-f75c1dad1161,Tumor,1794ff56-db2d-4d1a-8758-cab7fe3d98c1,YES,PASS
+S004,C3L-00084,Yes,3,11,129N,C3L-00084-01,CPT0000820012,Tumor,No,NA,NA,NA,Carcinosarcoma,YES,Normal,NA,NA,NA,NA,NA,NA,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-2.25,-1.63,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,NA,NA,NA,NA,-0.004758788,-0.011853715,-0.209865909,0.094452817,0.349513405,0.009282372,NA,NA,NA,MSS,No,WT,WT,1.835786443,1.732044006,0.319245674,6.392317423,6.247927513,3.169925001,NA,16,28,29.33333333,17.33333333,5.333333333,4,Blood_normal,b0c06553-0088-41fd-a0de-fecc9bb6574a_gdc_realn.bam,085f713e-b95c-4b59-ae60-22b70e8f499d,Tumor,e58981f9-0125-452a-9a6a-ad0c3116ebdd_gdc_realn.bam,d09a2cf2-6b59-43ca-aeea-494f099f879e,Blood_normal,edc0e018-cb28-4bf3-b1ff-39490924e401,Tumor,f6eac4de-7732-414b-bcd2-de1bab0a00a3,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_L002_R1_001.fastq.gz,b0a7cdf2-2ad8-4442-91b0-548ea4975554,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_L002_R2_001.fastq.gz,c83987a5-1c13-4af4-b46c-218fe5f60c34,NA,NA,YES,PASS
+S005,C3L-00090,No,3,12,129C,C3L-00090-01,CPT0001140003,Tumor,No,United States,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,32.69,75,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,3.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,4 or more,0.005577885,0.060593928,0.112090146,0.044365306,0.058249827,0.003754992,9.84E-10,0.715367914,0,0.019078676,0.007199854,0.047075193,0,0.06169803,0,0,0.022737675,0.002598418,0,0.014684549,0.002542568,0,0.018807855,0.122544366,0,0.0041831,0.065981557,0,9.00E-04,0,0.39,3012.556593,4462.910274,7475.466867,0.650749915,0.129,0.0696,-1.14,0.7,0.4,-1.85,-0.73,-0.39,-0.53,-1.06,-0.07,0.39,0.15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.002260223,0.098113801,CNV_LOW,1.750745835,-0.043811447,-0.073670939,-0.050230318,0.00313913,0,0.000109284,0.076005374,0.571299524,0.352695102,MSS,No,WT,WT,1.349358266,1.258934249,0.207641466,5.672425342,5.523561956,2.584962501,CNV_low,17.77777778,8.888888889,62.22222222,8.888888889,2.222222222,0,Blood_normal,7959d3f4-80cb-45f5-99ea-248ac2059877_gdc_realn.bam,d74368b7-1924-4c44-8079-1858f7ebf85a,Tumor,cfc5140e-1c15-4b9f-83e0-5613e427870e_gdc_realn.bam,055e85d1-c36d-4fbd-b578-4fd23e81b9e5,Blood_normal,833145ed-db94-4cb2-8429-df062a71bd8a,Tumor,544edc00-ac31-47cf-9489-1a797eb22e53,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_GAGTGG_S10_L003_R1_001.fastq.gz,8ce5618d-9ff6-40f9-aeea-8d8e1633ae38,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_GAGTGG_S10_L003_R2_001.fastq.gz,06d3fd4a-a623-4146-8500-4f1f17235253,Tumor,a6524c2d-d7dd-4629-980e-b45dbdc92c49,YES,PASS
+S006,C3L-00098,No,4,14,129N,C3L-00098-02,CPT0000980012,Tumor,No,United States,NA,under 50 %,Serous,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,NA,20.28,63,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,6,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Normal,NA,Cannot be determined,2,0.003088213,0.014328871,0.063531838,0.031658353,0.063943075,0.007304327,8.69E-09,0.816145315,0.005773262,0,0.027627049,0.050190166,0,0.015101252,0.026119631,0.011140644,0,0.012162159,0,0.02318389,0,0.054263627,0.064099852,0.140338468,0,0,0,0,0,0,0.43,1717.74787,6545.943147,8263.691016,1,-2.18,-2.8,-0.98,-0.61,1.85,-2.14,0.89,-0.54,-2.51,-0.02,0.96,2.1,-1.98,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.330703347,0.24420711,CNV_HIGH,7.062483444,-0.01420399,-0.000856205,-0.381597562,0.131950499,0.268048184,0.064997832,0.26273407,0.623745319,0.113520611,MSS,No,WT,WT,1.634715536,1.530342916,0.282995148,6.108524457,5.95419631,3,CNV_high,9.836065574,13.1147541,62.29508197,3.278688525,8.196721311,3.278688525,Blood_normal,0f46b9b0-6bff-4156-b505-669d72cc9428_gdc_realn.bam,b58bdcce-b807-4958-814d-3576dee3ae0c,Tumor,40c0a0d3-bdfd-4fa1-9aaa-9e8c747cc53c_gdc_realn.bam,bdde9d09-6a54-46d9-985b-aaf95799f6f9,Blood_normal,385cdfe0-fbae-486f-b333-4a88d1209afb,Tumor,a64c4b72-0517-431f-8128-3a89ad7b387c,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_TTAGGC_S8_L002_R1_001.fastq.gz,31252ba9-e052-4b77-809a-f936379ae00c,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_TTAGGC_S8_L002_R2_001.fastq.gz,23be22ae-de50-4d74-a7c0-c890adbc662a,NA,NA,YES,PASS
+S007,C3L-00136,No,4,16,129C,C3L-00136-03,CPT0000730011,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,55.67,50,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,4.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.001629109,0.064998151,0.174134279,0.006508822,0.114858814,0.003170654,9.76E-10,0.634700169,0.001059623,0,0.012292451,0.089036423,0,0.218447078,0,0.014378883,0,0,0.084425262,0.014546171,0.041004139,0.027346769,0.012830509,0.145650337,0.00817583,0.012429489,0.21207275,0,0.006304285,0,0.9,3414.419984,5622.138932,9036.558917,0.385199529,-0.892,-0.983,1.34,-1.18,0.08,1.32,-0.24,1.04,0.78,-0.27,0.04,-0.76,0.56,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018445972,0.219721966,CNV_LOW,3.206485284,-0.00468552,-0.110063016,-0.119590154,0.588337381,0,0,0.189781135,0.554177035,0.25604183,MSS,No,WT,WT,1.182279033,1.101321187,0.168435383,5.392317423,5.247927513,2.321928095,CNV_low,16.21621622,13.51351351,51.35135135,8.108108108,8.108108108,2.702702703,Blood_normal,2a17380f-bcb1-4de3-8934-af1d4b914892_gdc_realn.bam,27465b6e-6b30-429d-9e0b-cac09c0a8f91,Tumor,0da07c96-e95e-45df-b12f-8c745696f271_gdc_realn.bam,5fc17a22-c109-4f25-8902-a7d9b7642915,Blood_normal,df7dc177-6120-49ae-9046-fffbeed04583,Tumor,9cb2d2f8-a700-4dbf-bc69-cb34274209b9,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTCCGC_S10_L003_R1_001.fastq.gz,df0e2942-c702-4135-81a0-fbec4439d753,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTCCGC_S10_L003_R2_001.fastq.gz,4e1ad404-4646-4828-91b9-e3c35a4ce505,NA,NA,YES,PASS
+S008,C3L-00137,No,4,15,130N,C3L-00137-02,CPT0002010011,Tumor,No,Other_specify,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,1,25.68,60,No,White,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior,Unifocal,5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,2,0.004052783,0.09190801,0.07002453,0.042667198,0.056644142,0.001135568,8.13E-09,0.73356776,0.052107498,0,0.006136476,0.01781162,0,0.096159419,0,0.058437763,0,0,0,0.027957379,0.019246682,0,0.006238274,0.066621604,0,0.015480802,0,0.049418849,0.024383635,0,0.44,2309.012756,2928.488048,5237.500804,0.54271797,1.06,-0.745,0.16,0.41,-0.66,-0.28,-0.26,0.42,1.01,0.18,-2,0.58,-1.05,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.012181118,0,CNV_LOW,0.610544403,-0.000726575,-0.007950328,-0.005928216,0.009886273,0.013221618,0.019164116,2.27E-18,0.642431399,0.357568601,MSS,No,WT,WT,1.349358266,1.2211157,0.282995148,5.672425342,5.459431619,3,CNV_low,27.90697674,11.62790698,48.8372093,4.651162791,2.325581395,4.651162791,Blood_normal,552d7534-1fd1-490b-8751-9dff1bf55569_gdc_realn.bam,d5a11391-8b84-4be2-8a8e-62d1e646b786,Tumor,6b8b5928-258f-4f93-9e52-516db4562c2d_gdc_realn.bam,24d0c8fa-2bb4-465b-96f7-680566a44c2f,Blood_normal,3183b3b5-5fdf-4f74-928e-7493bd04c1eb,Tumor,be294530-6817-4729-b01c-e2845ddabf79,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTGAAA_S12_L003_R1_001.fastq.gz,8fcdd6a1-a7c7-41b5-8b44-e41f2237b236,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTGAAA_S12_L003_R2_001.fastq.gz,2bea607d-6eb2-4583-90d7-7823a3d8a572,NA,NA,YES,PASS
+S009,C3L-00139,No,3,11,130N,C3L-00139-01,CPT0001850012,Tumor,No,United States,NA,50 % or more,Serous,YES,Normal,pT3a (FIGO IIIA),pNX,cM0,Staging Incomplete,Stage III,IIIA,1,21.57,83,No,White,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior,Unifocal,4,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Normal,NA,Cannot be determined,4 or more,0.003577571,0.067769323,0.065130682,0.032434667,0.067883558,0.001420519,4.04E-09,0.761783675,0,0,0.00615282,0.017771978,0,0.101975277,0,0.060458172,0.016090453,0.021748457,0,0.010194706,0.012905363,0.074245364,0.053001228,0.035774299,0,0.009007116,0.099926731,0,0.010748036,0,0.53,2515.79784,2838.299899,5354.097739,0.691597084,-0.997,-1.72,-0.59,-1.37,0.33,-0.19,-1.09,0.36,0.55,-1.18,-1.55,-1.16,-1.6,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0.331737971,0.274666466,CNV_HIGH,5.846872142,0,-0.000207719,-0.485607019,0.335791813,0.405527663,0,0.074969712,0.487728614,0.437301675,MSS,No,WT,WT,1.691133088,1.634715536,0.168435383,6.189824559,6.108524457,2.321928095,CNV_high,7.352941176,33.82352941,36.76470588,8.823529412,4.411764706,8.823529412,Blood_normal,f72de443-e927-4cbf-a81d-3c906af657db_gdc_realn.bam,3e395930-3001-49bf-95ca-7c96e0204b57,Tumor,41bf607b-596b-4c43-9cd5-f245cf6e15fe_gdc_realn.bam,fb433f9d-bb8a-41e3-a5a0-997b16abd0fe,Blood_normal,eaab07a2-6162-4605-a1ba-8ff3a0bebb60,Tumor,e8a2f8dd-5e48-4ebe-a47f-b933e330fd60,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_CAGATC_S1_L001_R1_001.fastq.gz,7785d5a1-a60d-41f9-86f3-e4ebc100704c,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_CAGATC_S1_L001_R2_001.fastq.gz,90ced367-0342-4739-93b2-4b1a4af800c4,Tumor,a02b2784-9e7f-41b1-8e53-707ae4371c45,YES,PASS
+S010,C3L-00143,No,4,14,130C,C3L-00143-01,CPT0001910016,Tumor,No,United States,FIGO grade 1,Not identified,Endometrioid,YES,Normal,pT1 (FIGO I),pN0,cM0,Staging Incomplete,Stage I,IA,0,34.26,69,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,5.2,Cannot be determined,NA,Cannot be determined,NA,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Cannot be determined,2,0.004179295,0.075393304,0.175829124,0.034775754,0.044993308,0.009247814,3.74E-07,0.655581026,0,0,0.034349671,0.305302969,0,0.257694874,0.002504556,0.10551234,0,0.030562897,0,0.017594418,0,0.079259476,0.091911576,0.352506586,0.061026835,0,0.021773803,0,0,0,1.36,4181.251111,8463.786011,12645.03712,0.614413457,-0.729,-1.79,-1.35,-1.4,0.97,-0.4,0.58,-0.88,-0.66,0.51,1.19,0.52,-1.39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0.000232878,0.0418212,CNV_LOW,0.825469308,-0.000310868,-0.023866202,-0.025598317,0.554316666,0,0,0.314749855,0.516615598,0.168634547,MSI-H,No,WT,WT,4.362228844,3.961812528,2.581235568,9.306061689,8.882643049,7.339850003,MSI-H,21.86836518,1.6985138,42.46284501,22.71762208,6.157112527,5.095541401,Blood_normal,c043b586-b3c2-4a81-a4f3-ccc22a2ec00d_gdc_realn.bam,937a94b6-f52b-4bd0-a1e5-32b1d98e41ca,Tumor,791db96f-984d-4482-bd0c-5691bef0b600_gdc_realn.bam,b44bbc36-16b3-431f-878e-c2f9110f580e,Blood_normal,d4c330b5-d7f2-4f29-9002-e10479caa5ad,Tumor,699ff6b6-296d-4897-94c8-13d90dce5824,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ACTTGA_S4_L001_R1_001.fastq.gz,6412838b-2f70-4b14-a6ee-3c7baca09fb0,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ACTTGA_S4_L001_R2_001.fastq.gz,5d0a26e0-2739-4f38-9350-c685b44911d3,Tumor,872be4b7-1735-48a6-a3a2-7541ec65ea87,YES,PASS
+S011,C3L-00145,No,3,9,131,C3L-00145-02,CPT0002630003,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,Staging Incomplete,Staging Incomplete,Stage I,IA,0,36.57,59,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,4.7,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.004218625,0.159679699,0.096748688,0.042677677,0.110137007,0.003454382,4.99E-09,0.583083916,0.005885269,0,6.00E-04,0.016694075,0,0.215617674,0,0.072989693,0,0,0,0.060977392,0.045689566,0.063456552,0.023867009,0.122295409,0.033059137,0,0.042382678,0,0.010365772,0.006120479,0.72,4669.852221,5269.577209,9939.42943,0.270592352,0.0794,0.771,0.43,-0.3,-0.66,0.26,-0.02,-0.02,1.46,0.47,-0.61,-0.13,0.95,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.00015084,CNV_LOW,0.23259562,0,-0.005465167,-0.006784099,0.032914497,0,0,0.100674478,0.3855902,0.513735322,MSS,No,WT,WT,1.182279033,1.037474705,0.282995148,5.392317423,5.129283017,3,CNV_low,17.64705882,14.70588235,52.94117647,8.823529412,5.882352941,0,Blood_normal,2a8535dc-eb5a-4517-a7be-6d52d87a4233_gdc_realn.bam,a4663b3f-7399-4fb6-b11b-536983006cac,Tumor,24d1cc36-8239-4162-a2c2-6448607a4620_gdc_realn.bam,aabae279-c40a-4c09-92bc-e2f9d662eed7,Blood_normal,f0c2d7a9-4236-4c49-84bc-2c02169339dd,Tumor,fe66b253-513c-4b4d-a0fc-abd808170301,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GAGTGG_S19_L005_R1_001.fastq.gz,1f5e3577-dbc5-46f7-8615-86887337a97e,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GAGTGG_S19_L005_R2_001.fastq.gz,1c7f3fd1-e321-45b0-8e70-7ecad29c0558,Tumor,60e34478-d9d3-41ae-bf52-a328951196ad,YES,PASS
+S012,C3L-00156,No,4,15,129N,C3L-00156-02,CPT0002080011,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,27.83,56,Yes,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Unifocal,2.2,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,2,0.005791063,0.028713248,0.122822159,0.035670588,0.047452551,0.00692925,5.28E-09,0.752621136,0,0.015477154,0.050717552,4.00E-04,0,0.180804762,0,0.010317116,0.011980289,0,0.033211036,0.014532283,3.00E-04,0.128991522,0.006664382,0.151204409,0,0.022078706,0,0.018873801,0,0.024388609,0.67,2304.797045,5850.431589,8155.228634,0.665726073,0.75,-0.422,-1.11,0.72,1.02,-1.36,0,1.46,-1.41,0.22,0.77,1.05,0.47,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,0,1,0.0006768,0,CNV_LOW,0.103538746,0,0,-0.003059389,0.002757972,0.002000249,0,0.147902919,0.694163579,0.157933501,MSS,Yes,WT,Missense_Mutation,5.612520487,5.60519854,0.319245674,10.59712143,10.58965115,3.169925001,POLE,23.24675325,1.168831169,51.94805195,8.896103896,0.454545455,14.28571429,Blood_normal,ee8e3dc1-6ec7-44fe-9e3e-a7455e9920ac_gdc_realn.bam,a402cc2d-41ac-4d0a-ac99-3a014754fd49,Tumor,ed2b6a93-896d-4fc9-8e2b-00821b00fe58_gdc_realn.bam,799c73ad-e7de-45e6-9548-ac30072150e7,Blood_normal,75786b70-9e4a-42a8-892a-29812f7b0e20,Tumor,b2cf2d6b-1638-4d6b-8df3-8ebe30121041,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTGGCC_S16_L004_R1_001.fastq.gz,e1abab26-3582-4e7c-bbea-7cadbd0e36e5,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTGGCC_S16_L004_R2_001.fastq.gz,b92610bf-9bcd-4f26-948d-448f963586de,NA,NA,YES,PASS
+S013,C3L-00157,Yes,4,13,130C,C3L-00157-02,CPT0002560003,Tumor,No,United States,NA,under 50 %,Clear cell,YES,Normal,pT3a (FIGO IIIA),pN2 (FIGO IIIC2),cM0,Staging Incomplete,Stage III,IIIC2,1,34.84,60,No,White,Hispanic or Latino,Female,Anterior endometrium,NA,Unifocal,4.2,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.004931836,0.008916274,0.097631079,0.02297888,0.089019671,0.005064803,1.52E-10,0.771457457,0,0,0.021917773,0.002189731,0,0.027412253,0,0.017302047,0.001802197,0,0,0.016747742,0.030421378,0.00561744,0.009263415,0.080331073,0,0.05295342,0,0.134807458,0,0.069234075,0.47,2212.345792,6285.433874,8497.779666,NA,-1.63,-2.86,0.38,1.35,0.94,-1.85,1.43,1.25,-1.62,1.67,-0.47,0.13,-1.13,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.469507242,0.175301288,CNV_HIGH,4.417577316,-0.036731633,-0.007310833,-0.274529469,0.218618281,0.082265915,0.005930261,0.455792253,0.362982267,0.18122548,MSS,No,WT,WT,1.121990524,0.993284573,0.245810227,5.285402219,5.044394119,2.807354922,CNV_high,31.25,15.625,40.625,6.25,3.125,3.125,Blood_normal,8e4d5871-709e-49fa-aa5c-ab5e9060fca3_gdc_realn.bam,6b7c566b-4693-4ea8-979c-26fb83949551,Tumor,6fdfe226-a6b6-4836-a287-4b8b6e167e07_gdc_realn.bam,fc36eee8-07a4-4ff9-9a4c-5dab053b833b,Blood_normal,4679e568-2b87-4f05-a67c-9d52f0143e05,Tumor,a0a89248-1fe0-4b60-aaf3-49e1e971923d,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CGTACG_S17_L005_R1_001.fastq.gz,ea887db4-6dcb-4e75-b57f-5a0211fb4ba6,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CGTACG_S17_L005_R2_001.fastq.gz,9dd491c3-794c-4f2d-8010-512c90360b1b,NA,NA,YES,PASS
+S014,C3L-00161,No,4,13,131,"C3L-00161-01,C3L-00161-02",CPT0070380004,Tumor,No,United States,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,Staging Incomplete,No pathologic evidence of distant metastasis,Stage I,IB,1,68.39,46,No, White,Hispanic or Latino,Female,"Other, specify",Anterior and Posterior Endometrium,Unifocal,7,Cannot be determined,NA,Cannot be determined,NA,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Absent,2,0.005614163,0.097078048,0.113625405,0.034682381,0.105438711,0.002562586,3.03E-09,0.640998702,0,0,0.029774138,0.23977877,0,0.072129432,0,0.077566594,0.026782261,0,0.067142743,0,0.021205627,0.062962959,0.056281696,0.117348345,0.012365358,0,0.106293289,0,0,0.020368787,0.91,2869.70182,5255.761765,8125.463586,0.51938048,-0.995,-2.4,2.12,0.42,-0.64,1.83,0.21,-0.04,0.12,-0.27,-0.02,0.35,1.2,0,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.017465096,CNV_LOW,0.175460897,-0.00100262,-0.005212673,-0.005595477,0,0,2.79E-06,0.142389668,0.643834137,0.213776195,MSI-H,No,MS_indel,Frame_Shift_Del,4.875737466,4.335929558,3.34540868,9.840777924,9.278449458,8.214319121,MSI-H,15.64516129,1.612903226,61.4516129,17.25806452,2.580645161,1.451612903,Blood_normal,86fb70f6-d13a-4fdf-80eb-a744902cbec2_gdc_realn.bam,47d0c160-601e-445d-b19e-77a5d20d623f,Tumor,a9ccc7bf-8b4f-43bb-90ce-d943ea5bac6e_gdc_realn.bam,b6c8e44f-c2d4-4353-b4a2-8859e573da0b,Blood_normal,e8353960-b692-41e4-b7c3-007b1d54f4a2,Tumor,58d48892-e06c-4389-8a66-efe483c39f35,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_CGATGT_S6_L008_R1_001.fastq.gz,9e12b685-c46a-431b-bb9e-a90c6f7892ee,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_CGATGT_S6_L008_R2_001.fastq.gz,787253e0-0877-4bfa-98f5-9a258f8e1b57,NA,NA,YES,PASS
+S015,C3L-00356,Yes,3,11,129C,C3L-00356-01,CPT0002130003,Tumor,No,United States,NA,under 50 %,Carcinosarcoma,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,1,25.37,73,No,White,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior endometrium,Unifocal,3.9,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,2,0.005414938,0.025758874,0.070554976,0.027960938,0.044537714,0.001488506,5.19E-10,0.824284054,0.024455326,0,0.06845052,0.02094494,0,0.027481813,0,0.0549426,0,0,0,0.054428947,0,0.128290373,0.022855717,0.094933198,0,0.036388646,0.007330618,0,0.008005031,0.001492271,0.55,1945.541934,3256.99141,5202.533343,NA,-2.41,-1.9,0.38,-0.1,0.53,0.85,-0.74,-0.21,1.07,-0.95,0.37,-0.56,-0.35,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.403159273,0.272816019,CNV_HIGH,NA,-0.076781268,-0.065265686,-0.290586275,0.003074446,0.683586023,0.004786549,0.133055973,0.500491115,0.366452912,MSS,No,WT,WT,1.2211157,1.182279033,0.086674411,5.459431619,5.392317423,1.584962501,CNV_high,24.3902439,7.317073171,46.34146341,4.87804878,4.87804878,12.19512195,Blood_normal,0d1a19ed-b1f4-4d0f-9eff-5922afcb6cbe_gdc_realn.bam,b94ff0a4-127b-4fac-9a68-2ec50ce6c202,Tumor,e6e1c2a6-27c3-4d55-87ed-6cd0ab0fd6b6_gdc_realn.bam,cece60fc-811e-4d03-9f7e-731f0b80212f,Blood_normal,8b2d66ef-8fc7-47b4-9717-2dc51259c0a6,Tumor,b841bd14-fc26-40a4-b89b-f7fc50cdc513,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_CGATGT_S11_L004_R1_001.fastq.gz,4b2a33c2-3d9a-405c-9d10-049bed615f1d,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_CGATGT_S11_L004_R2_001.fastq.gz,2e91202d-2403-4619-8111-5f26fdf879b1,NA,NA,YES,PASS
+S016,C3L-00358,No,1,3,130N,C3L-00358-01,CPT0002210011,Tumor,No,United States,NA,50 % or more,Serous,YES,Normal,pT1b (FIGO IB),pNX,cM0,Staging Incomplete,Stage I,IB,1,26.22,90,No,White,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior endometrium,Unifocal,4.5,Positive,75,Positive,5,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Overexpression,"CD10 is negative. CD56 and Synaptophysin stains highlight the neuroendocrine component, while chromogranin stain is largely negative.",Cannot be determined,Unknown,0.005182084,0.022440994,0.105618133,0.018651113,0.04257597,0.00569926,1.49E-10,0.799832446,2.00E-04,0,0.087616998,0.048920734,0,0.136319654,0,0.016860233,0,0,0,0.081825121,0,0.102629415,0.092353708,0.213796665,0,0,0,0.07237302,3.00E-04,0.006842302,0.86,2379.6735,6074.369909,8454.043409,0.900919036,-0.67,-2,-1.32,-1.4,0.83,-0.07,0.79,0.25,-0.98,0.69,-0.07,-0.33,-2.19,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.372287041,0.269313888,CNV_HIGH,6.264013781,-9.85E-05,-0.021353659,-0.278804664,0.26160588,0.326433728,0.007941129,0.223339079,0.53935279,0.237308131,MSS,No,WT,WT,2.066555769,1.93256673,0.455772534,6.700439718,6.523561956,3.700439718,CNV_high,10.98901099,36.26373626,36.26373626,7.692307692,2.197802198,6.593406593,Blood_normal,8927cd25-7348-4319-8a9b-248fc0bc8e56_gdc_realn.bam,70b053c6-05af-43b7-8393-bef10a4390a9,Tumor,ea49ecce-a8c3-410f-987b-07364fee8b18_gdc_realn.bam,15b5f6c6-6f94-43c2-951c-72df48f24ee2,Blood_normal,bb62f2d2-b146-490a-bc13-b209c265d28d,Tumor,573df828-ddd8-43e5-9dc2-513911ee977f,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTTTCG_S2_L001_R1_001.fastq.gz,9cb3b6ce-7b2a-4225-b0e7-d32d47171740,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GTTTCG_S2_L001_R2_001.fastq.gz,e9a087df-10b2-498e-90d3-cd60747088b8,NA,NA,YES,PASS
+S017,C3L-00361,No,1,2,127C,C3L-00361-01,CPT0002430011,Tumor,No,United States,FIGO grade 1,Not identified,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,42.98,64,Yes,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Unifocal,2.7,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Cannot be determined,None,0.005114234,0.025807817,0.092134705,0.037330484,0.062030244,0.000940909,6.77E-10,0.776641607,0.005639499,0,0.022842137,0.029908963,0,0.095277213,0,0.006606719,0.015668239,0,1.00E-04,0.002681846,0.004718605,0.007085054,0.01282946,0.058647987,1.00E-04,0.009461798,0.027745052,0,7.00E-04,0,0.3,613.0876971,2936.879453,3549.96715,0.955786378,0.443,0.75,-1.16,-1.66,0.1,-0.19,-1.97,0.39,-2.33,-1.78,-2.3,1.56,-1.69,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1.11E-05,0.041329089,CNV_LOW,1.300911329,-0.000490928,-0.038363057,-0.043604925,0.810292168,0,0,0.046460963,0.848208592,0.105330444,MSI-H,No,WT,WT,4.415525945,4.056121768,2.512450001,9.361943774,8.982993575,7.257387843,MSI-H,19.00990099,2.772277228,48.31683168,18.01980198,8.316831683,3.564356436,Blood_normal,7df399c7-9281-4eb1-8e1b-9d88c80e5881_gdc_realn.bam,9c34aa06-3655-49af-86ea-83132b1eb953,Tumor,8c244af7-4d0a-447d-8da2-af9d147718aa_gdc_realn.bam,15474b0b-f26e-4516-80ec-aca7986d8282,Blood_normal,294e6bb9-3965-42e0-8a2b-d954c16990f0,Tumor,47f3fed4-4116-4843-9843-0ab433dad9a4,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ACTGAT_S12_L004_R1_001.fastq.gz,d1c64f6f-aa85-463a-8773-69513cad82a7,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ACTGAT_S12_L004_R2_001.fastq.gz,c0cbd504-5d93-4f9d-bb5a-1cbe602f2a3d,Tumor,a30ec717-112c-4f16-86c9-e31d765d7033,YES,PASS
+S018,C3L-00362,No,2,5,130N,C3L-00362-02,CPT0002500011,Tumor,No,United States,FIGO grade 1,Not identified,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,55.86,38,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,13.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Normal,P16 and BCL2 positive,Absent,None,0.00332298,0.200220933,0.095114204,0.040712363,0.07238533,0.00133655,2.86E-09,0.586907637,0.028168355,0,0.008908075,0.05901444,0,0.158299126,0,0.027016898,0.028332406,0,0,0.018869097,0.046614126,0.052890205,0,0.06641301,0.030543531,0.032192268,0.034316037,0,0,0.018422424,0.61,2953.951139,3924.864459,6878.815598,0.466250633,-0.153,0.604,0.68,-1.31,-0.53,1.52,-0.82,0.64,0.67,-0.6,-1.08,-0.65,2.14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.348704409,0.128758751,CNV_HIGH,4.16601537,-0.000537474,-0.210417756,-0.165150754,0.224806047,0.010537265,0.015115205,0.130018403,0.514841368,0.355140229,MSS,No,WT,WT,0.801600757,0.775745316,0.043988012,4.64385619,4.584962501,1,CNV_high,30.43478261,13.04347826,39.13043478,4.347826087,8.695652174,4.347826087,Blood_normal,fabf7d4a-34b3-4c27-8921-035cace2795b_gdc_realn.bam,20d5f472-7ada-4026-ad90-c4aee6490d47,Tumor,cc81280d-f906-4e45-a988-000aeb16fcdf_gdc_realn.bam,e297322a-2b1a-4610-a5d7-bb31fffd07bc,Blood_normal,40d09165-a271-453d-9bd0-1f6056b7b83d,Tumor,3af7e6a3-594d-4830-a5ea-cef88e23f906,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ATTCCT_S6_L004_R1_001.fastq.gz,638dd879-a5ee-437c-9f30-c97dcfa19a09,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ATTCCT_S6_L004_R2_001.fastq.gz,a02f460b-1cbd-4c92-a5ef-9928088c8c1e,Tumor,817eff3d-6e39-45e1-bcee-fca423af1aef,YES,PASS
+S019,C3L-00413,No,4,13,130N,C3L-00413-01,CPT0009980003,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT2 (FIGO II),pN0,cM0,Staging Incomplete,Stage II,II,0,42.19,60,No,White,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior,Unifocal,3.2,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.002976646,0.032530637,0.072912978,0.076101921,0.074618826,0.00116644,1.17E-09,0.739692551,0.012463677,0,0.001624051,0,0,0.104674412,0,0.060904492,0,0,0.029287744,0,0.033466742,0.048188695,0,0.022231967,0,0.042889981,0,0.018753335,0.018264584,0.01725032,0.41,1184.03053,3696.358111,4880.388642,0.501202113,-1.63,-2.57,1.78,1.11,-1.4,1.54,-0.23,1.19,0.57,0,-1.02,2.24,1.64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.001083995,0.167850707,CNV_LOW,2.92410267,-0.091810653,-0.097520363,-0.103711441,3.80E-05,0.000125954,7.34E-05,0.158828136,0.632079059,0.209092805,MSS,No,WT,WT,1.331721187,1.258934249,0.168435383,5.64385619,5.523561956,2.321928095,CNV_low,15.55555556,8.888888889,57.77777778,8.888888889,4.444444444,4.444444444,Blood_normal,92bebf14-5fd0-453e-99a3-c7041b9cde22_gdc_realn.bam,6058412c-d366-44dd-a0fc-4aad9f6166dc,Tumor,ef798c59-f595-45b8-960e-6b36c8169fb9_gdc_realn.bam,f9bf7283-eb23-477d-bc36-b928c96ff1f2,Blood_normal,22c5648f-e238-4e98-b2d5-32d41ab94314,Tumor,7cd7dd5b-4c04-4f55-88c5-73a813a1b7a8,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_GTGAAA_S1_L006_R1_001.fastq.gz,e46aae2a-0162-443c-9f2d-7f95cd2f132e,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_GTGAAA_S1_L006_R2_001.fastq.gz,05b8da1e-d495-4023-9aca-b71417350ee1,Tumor,77466cca-387b-4e58-861c-bb2e74085989,YES,PASS
+S020,C3L-00449,No,2,5,131,C3L-00449-01,CPT0007100003,Tumor,No,United States,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,27.82,59,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior endometrium,Unifocal,7,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Cannot be determined,None,0.005195016,0.103207263,0.13687414,0.035558676,0.132518871,0.003291385,3.95E-09,0.583354645,0.011222617,0,0.051383493,0.061136375,0,0.284732939,0,0.052863572,0,0,0,0.009243439,0,0.027789143,0.052425878,0.154422067,0.011092339,0.028793179,0.047224779,0,0,0.137670181,0.93,5176.758433,6698.972556,11875.73099,0.334225635,-0.23,-0.407,1.57,0.42,1.93,0.75,0.77,0.27,1.1,1.31,1.03,0.09,1.65,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018199062,0.044998681,CNV_LOW,0.448567224,-0.010880178,-0.011242408,-0.013211994,8.84E-05,0,0,0.237359854,0.330364549,0.432275597,MSS,No,WT,WT,1.560942049,1.434447489,0.319245674,6,5.807354922,3.169925001,CNV_low,12.72727273,3.636363636,61.81818182,7.272727273,5.454545455,9.090909091,Blood_normal,5a75740b-ad5b-4f29-8b9e-1d011f50aac7_gdc_realn.bam,e8723283-d74f-456f-90e8-7a54b0d8abe2,Tumor,73d8e268-db73-4fa5-b943-4e99a762e81a_gdc_realn.bam,497a5093-f4f0-4fbd-b869-77bec7018f5b,Blood_normal,688ef886-6324-46d5-89ee-11be2c3c3b8b,Tumor,79b0a390-f6b2-4eb1-a905-90f69c0104e5,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CGATGT_S16_L004_R1_001.fastq.gz,f77c5bff-f9df-4a57-a2e9-391fcd5a6ba2,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CGATGT_S16_L004_R2_001.fastq.gz,1faa1af4-b72b-4267-a86a-893fa2f67d7f,Tumor,ce10e000-0fb6-4697-9ad3-16ef7f03d668,YES,PASS
+S021,C3L-00563,No,3,11,128N,C3L-00563-03,CPT0063250003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,34.72,62,Yes,Asian,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior,Unifocal,3,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,1,0.005543654,0.017993074,0.086076324,0.050296678,0.087109023,0.001089022,1.95E-10,0.751892225,0,0.014642819,0.028840788,0.087713967,0,0.107587815,0.009782414,0,0,0,0,0.016213555,0.018172299,0.043837507,0.012564144,8.00E-04,0.001049951,0.004345187,0.016645745,0,0.007794239,0,0.37,864.2241924,3523.379962,4387.604155,0.61702856,0.616,1.47,-0.57,-0.12,-1.02,-0.1,-1.4,-0.28,-0.41,-1.52,-1.39,0.35,0.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.065467984,CNV_LOW,1.827783368,-0.000633572,-0.052457471,-0.055272951,0.713144091,0,0,0.085761394,0.713908354,0.200330252,MSS,No,WT,WT,1.401011577,1.331721187,0.168435383,5.754887502,5.64385619,2.321928095,CNV_low,6.12244898,6.12244898,63.26530612,14.28571429,6.12244898,4.081632653,Blood_normal,f38738f1-ed43-4824-9c84-b23a66d122b2_gdc_realn.bam,f80735d2-6a7e-4d58-965d-5b3b48520bd0,Tumor,e1743cdd-69f2-4643-810d-4f1c677560ac_gdc_realn.bam,1b3536a2-9329-4dd7-a180-fdeb3e6c96e8,Blood_normal,611aab51-5e8b-42a5-a07c-0f263c5b99cb,Tumor,8135d8d2-1d84-43d2-ac41-4eec5cebb607,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_TGACCA_S7_L002_R1_001.fastq.gz,8b076b0a-e1a0-4215-ab01-2aa7e0a0d947,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_TGACCA_S7_L002_R2_001.fastq.gz,9c892a88-3d18-4757-8c0c-ae75966102c7,Tumor,45eec88a-c933-459c-8dce-9fbbfedda949,YES,PASS
+S022,C3L-00586,No,2,6,129C,C3L-00586-03,CPT0023630003,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT2 (FIGO II),pN2 (FIGO IIIC2),cM0,Staging Incomplete,Stage III,IIIC2,1,21.45,50,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and posterior endometrium,Unifocal,6,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,2,0.006596366,0.064384568,0.126116005,0.0421249,0.104962403,0.005944262,5.66E-12,0.649871496,0.023552518,0,0.047689147,0.018563104,0,0.176219029,0.007532985,0.083529995,0,0.052661005,0,0.013514406,0,0.036327449,0.05725825,0.110485885,0.004780403,0,0,0,0.001638531,0.006247294,0.64,3748.640733,6767.884691,10516.52542,0.624274153,-0.715,-0.336,1.82,-0.19,1.49,1.31,0.56,0.57,-0.8,0.29,0.34,0.33,0.43,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,0,1,0,0,1,0,1,0,1,1,0.020352993,0,CNV_LOW,0.117984612,-2.55E-05,-0.000868323,-0.005118486,0.01153052,0,0,0.205001063,0.582138542,0.212860395,MSS,Yes,WT,WT,8.737485435,8.7318234,1.417826394,13.74766852,13.74199358,5.781359714,POLE,42.00729927,0.270072993,38.64963504,8.686131387,1.102189781,9.284671533,Blood_normal,dde47804-86f0-4b49-b448-483e4353bc22_gdc_realn.bam,38df11e7-4962-4828-a4bb-556f360d21a3,Tumor,c14109ad-a867-4f58-9e70-a7575597416a_gdc_realn.bam,8cf50c93-9dec-4515-82a7-1959f8e7f312,Blood_normal,cf887209-91e9-4a98-af3f-1559013596d4,Tumor,e9d15bfb-5cca-4f62-9cd6-247f8fe3e58e,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_ACAGTG_S23_L008_R1_001.fastq.gz,20fcc809-dcdf-46c5-b40d-c1aa39c6791c,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_ACAGTG_S23_L008_R2_001.fastq.gz,e731e945-44ae-4179-9672-a2a6a1fb2d94,Tumor,c250313a-9102-410f-9083-713007b3910f,YES,PASS
+S023,C3L-00601,No,2,6,127C,C3L-00601-02,CPT0007600003,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,25.03,57,No,White,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior,Unifocal,3.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.005166918,0.022602285,0.116480361,0.038409552,0.063408919,0.002075369,1.70E-10,0.751856594,0.031297567,0,0.053445267,0.072290433,0,0.079296814,0.035281171,0.083814428,0,0.009010017,0,0.02793142,0,0.026942536,0.029796878,0.075305468,0.004461257,0,0,0.001126745,0,0,0.53,1706.11003,4850.828051,6556.938081,0.724145823,0.372,-1.03,0.31,1.38,-1.63,0.82,-0.49,0.81,-0.17,-1.01,-0.78,-0.07,-0.44,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,1,1,0,1,0.018033471,0.052014371,CNV_LOW,0.36400443,-0.028557583,-0.003489469,-0.010103645,0,0.000349225,0.000650063,0.141321035,0.651375137,0.207303827,MSS,Yes,WT,Nonsense_Mutation,7.006927428,6.998568599,0.801600757,12.00947875,12.00105627,4.64385619,POLE,44.28989751,0.268423621,39.97071742,7.637872133,1.171303075,6.661786237,Blood_normal,50c4a6f8-9efd-4898-a962-a7d89078b9b5_gdc_realn.bam,b7d63e3b-56bd-446b-a6ef-c6197ef84a94,Tumor,84ccff59-af29-4cbf-8068-185a2e2b4c1f_gdc_realn.bam,52aa202f-2d6e-4b6d-bbae-49bdda1d0f3c,Blood_normal,db638288-3dbf-436b-8444-56185109f1eb,Tumor,50107236-d227-4b6c-bd75-b9e860965ea2,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ACAGTG_S3_L001_R1_001.fastq.gz,01b9fd20-8317-4067-84cf-e73dd8f92572,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ACAGTG_S3_L001_R2_001.fastq.gz,5efbd40c-4225-4a7f-9920-71357ccf41f0,Tumor,bc1f150a-bba5-4b3e-a911-3c0fb2bd2c3f,YES,PASS
+S024,C3L-00605,No,3,12,127N,C3L-00605-01,CPT0063890003,Tumor,No,United States,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,38.54,73,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior,Unifocal,2.7,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,Unknown,0.004118258,0.064685078,0.091412586,0.035879255,0.073710586,0.004558948,3.50E-09,0.725635286,0.009964698,0,0.055385291,0.067875939,0,0.049639616,0,0.069693176,0,0,0,0.054924606,0.057276039,0.006640164,0.013094056,0.147629049,0.010183766,0.036799065,0.018269735,0,0,0.0226248,0.62,3006.948997,4977.8832,7984.832197,0.519451692,-0.103,0.108,0.6,0.47,-0.51,0.54,-0.29,0.33,0.2,0.24,-0.94,0.41,1.95,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,2.22E-05,0.045314014,CNV_LOW,0.743913927,-0.017919344,-0.019029565,-0.021835922,0,0,0.000973227,0.086578499,0.607380774,0.306040727,MSS,No,WT,WT,1.691133088,1.560942049,0.354607617,6.189824559,6,3.321928095,CNV_low,14.28571429,9.523809524,61.9047619,9.523809524,0,4.761904762,Blood_normal,9c7363c2-750a-408e-aa11-1bfaae41e7d0_gdc_realn.bam,54a0c2d4-cbb6-41f9-bfaa-e8c58c7f5362,Tumor,d9a888d9-211a-4a13-bbed-23dba208a9d2_gdc_realn.bam,d3e79d6a-2baf-4059-9ecb-ec1073619f26,Blood_normal,8f0c137b-dc5d-43c6-81e8-d728d8a1c5a8,Tumor,e8fc5d11-248f-4095-b9bd-e4f2c732e6fb,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_AGTTCC_S22_L006_R1_001.fastq.gz,2ef7f6a7-6298-4cd2-a38b-9b810905ab57,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_AGTTCC_S22_L006_R2_001.fastq.gz,7056adfe-ff91-4228-91b4-9954ba0148dd,Tumor,e06eb928-39e8-491a-bf5f-395f69acf724,YES,PASS
+S025,C3L-00767,No,3,12,131,C3L-00767-01,CPT0026430003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,65.71,56,No,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Multifocal,2.3,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Normal,NA,Absent,4 or more,0.005662834,0.078699134,0.10828337,0.046021406,0.077628328,0.002766177,3.70E-09,0.680938746,0.001468636,0,0.003981782,0.019412424,0,0.082865548,0,0,0.006352148,0,0,0.042554994,0.009198579,0.017816682,0.003920914,0.110666462,0.002742905,0.009009601,0.092315548,0,0,0.007693776,0.41,2565.123134,3893.125442,6458.248576,0.445231747,-0.066,0.667,0.08,0.17,-1.75,-0.02,-1.2,-0.69,-0.46,-0.99,-0.88,0.33,-0.57,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.017682543,0.045375442,CNV_LOW,0.968385832,-0.02658492,-0.021106253,-0.019964098,0.002878658,0,0,0.030069605,0.669628949,0.300301446,MSS,No,WT,WT,1.499080695,1.401011577,0.245810227,5.906890596,5.754887502,2.807354922,CNV_low,20.75471698,7.547169811,52.83018868,11.32075472,5.660377358,1.886792453,Blood_normal,7993ec03-5c3f-4f2a-920d-33dbf262d40a_gdc_realn.bam,07e62483-1397-4efc-be62-9009f7568709,Tumor,bf2c19ca-4c02-444c-8137-65a17c479448_gdc_realn.bam,30f59c0f-6d2c-4513-beb7-0d9792fa3edb,Blood_normal,d859af69-ac8f-4a4a-a376-9a6af50c07f3,Tumor,87dc9106-3c23-4cd3-8a85-6992800a5673,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_CAGATC_S15_L004_R1_001.fastq.gz,398b396a-17ba-4210-a645-a0f049b6e01e,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_CAGATC_S15_L004_R2_001.fastq.gz,b906f8b8-457b-4eed-bcc9-e9fb458b8bfa,Tumor,88c71434-68d4-4496-b299-65ffe96d3c3f,YES,PASS
+S026,C3L-00769,No,2,5,129N,C3L-00769-01,CPT0026530003,Tumor,No,Other_specify,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,cM0,Staging Incomplete,Stage I,IB,0,46.64,56,No,White,Not reported,Female,Anterior endometrium,NA,Multifocal,4.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,2,0.005557191,0.115973086,0.094754968,0.06307061,0.100176584,0.004079595,8.81E-10,0.616387965,0.017468027,0,0.011671998,0.060441238,0,0.176943387,0.007105428,0.012846689,0,0,0.020821045,0.051867091,0.015500241,0.015333559,0.023880414,0.139210676,0.002732616,0.001379743,0.072599294,0,0,0.010198554,0.64,4634.296239,4977.305287,9611.601525,0.271365818,0.241,0.951,-0.06,-0.66,-0.14,-0.19,-0.4,-1.69,0.94,-0.46,0.01,-0.38,1.12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.019047801,CNV_LOW,0.037652558,0,-0.001245513,-0.004003871,0.001724522,0,0,0.055775942,0.28695343,0.657270628,MSS,No,WT,WT,1.201828046,1.080351414,0.245810227,5.426264755,5.209453366,2.807354922,CNV_low,13.88888889,13.88888889,58.33333333,8.333333333,5.555555556,0,Blood_normal,4f90dd58-4dc8-4338-95ae-ce993e9370e4_gdc_realn.bam,72f9d84b-db67-4e8a-89ce-b5e9ce97c214,Tumor,a0c6d074-5bd3-4293-a7f7-fc4366b926a4_gdc_realn.bam,fe1bab24-7e25-4cf8-ac7e-09ee67eedaa6,Blood_normal,3a884473-fe8a-45e8-ae2d-eaaeaae500da,Tumor,d8f8126d-326b-43ee-b830-62377996a4d3,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_ACTTGA_S14_L004_R1_001.fastq.gz,b279cd43-c02c-42f0-8c35-aaf3a15d1d3d,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_ACTTGA_S14_L004_R2_001.fastq.gz,89545c45-58a9-44f7-a03e-f0c0cf2a901f,Tumor,d51c9ba6-3ae1-4aff-a626-f625b2468af0,YES,PASS
+S027,C3L-00770,No,3,9,127N,C3L-00770-01,CPT0026650003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,45.83,73,Yes,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Multifocal,2.7,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,4 or more,0.005909312,0.036781095,0.10856077,0.052156669,0.067570639,0.003399076,1.44E-08,0.725622424,8.00E-04,0.002332159,0.006063698,0.057224851,0,0.124472718,0,0,0.04120487,0,0.019629883,0,0,0.062493983,0.011079146,0.053857024,0.018233193,0.008867171,0,0.035334153,0,0.048423841,0.49,2347.226471,5163.497494,7510.723965,0.684164791,0.135,0.243,-0.83,1.84,0.3,-0.87,0.31,0.11,-1.18,0.96,-0.42,1.33,-0.85,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0.002211541,2.00E-07,CNV_LOW,0.10422179,0,0,0,0.000816684,0.00090885,0.000754789,0.202506396,0.610413148,0.187080456,MSS,Yes,WT,Missense_Mutation,5.025868086,5.016239079,0.282995148,9.995767151,9.985841937,3,POLE,33.36623889,0.493583416,42.34945706,6.614017769,0.592300099,16.58440276,Blood_normal,330d5168-11d4-4bd3-924c-00a48a2e77f6_gdc_realn.bam,44017bf5-1244-4370-a7df-d8d40c18841c,Tumor,9c061602-847f-499d-bc0d-1761c1964373_gdc_realn.bam,8db62179-1b8d-4ed0-b1eb-e7145b9db850,Blood_normal,ee622934-1e17-44f8-8d8d-41011cc7e50e,Tumor,f9d8547d-be92-461a-b9ce-30c2deb01239,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_GATCAG_S11_L003_R1_001.fastq.gz,866a0af4-d611-4690-89f7-a6bd28807494,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_GATCAG_S11_L003_R2_001.fastq.gz,f5bbe1ad-856b-4c71-bd70-e538c2b1898e,Tumor,a2a7a853-0dc7-4476-b3cf-498a1a0e7a3b,YES,PASS
+S028,C3L-00771,No,3,11,127N,C3L-00771-01,CPT0026770003,Tumor,No,Other_specify,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT3a (FIGO IIIA),pN0,cM0,Staging Incomplete,Stage III,IIIA,1,22.86,86,No,White,Not-Hispanic or Latino,Female,Posterior endometrium,NA,Multifocal,6,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,Unknown,0.005200559,0.065950716,0.10865991,0.049383477,0.267129768,0.003655548,1.99E-09,0.500020021,0.022073223,0,0.031251803,0.051264159,0,0.139556902,0,0.015331903,0,0,0,0.021782958,0.027223716,0.04413059,0.006318375,0.105220232,0,0.023599304,0.130442357,0.019248281,0,0.002556199,0.64,6048.630658,5780.984035,11829.61469,0.131521622,0.205,-0.912,0.71,-0.21,-0.02,0.11,0.17,-1.3,2.19,0.29,0.29,-0.72,0.41,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.044799188,CNV_LOW,0.629254484,-0.017869153,-0.015028621,-0.012660904,0.000277034,3.93E-05,0,0.069933635,0.298756785,0.63130958,MSS,No,WT,WT,1.860596943,1.771826721,0.282995148,6.426264755,6.303780748,3,CNV_low,10.25641026,3.846153846,73.07692308,5.128205128,6.41025641,1.282051282,Blood_normal,2bbab210-8789-4844-9de7-509231db0e6c_gdc_realn.bam,e757f535-dfb5-452d-9796-87c50fa8a73b,Tumor,506829d5-3821-445a-b4c7-083fb483e8dc_gdc_realn.bam,319b9314-8f00-49f7-899e-727a6b5fafd4,Blood_normal,4b0f79f5-6bce-4af8-a6eb-a52a896cfe48,Tumor,0459b7a4-b983-423b-bdf6-f7d93f8f5cba,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_TAGCTT_S10_L003_R1_001.fastq.gz,a6fab0ab-ce4b-44cb-bf38-b9db4caa2e70,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_TAGCTT_S10_L003_R2_001.fastq.gz,8a6ab525-f78c-4814-89f3-4b487a4a847c,Tumor,6f3c3bb1-4d83-4cb1-bf1e-27d3489a55e6,YES,PASS
+S029,C3L-00780,No,2,7,129N,"C3L-00780-03,C3L-00780-05",CPT0070330004,Tumor,No,Other_specify,FIGO grade 2,Not identified,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,46.41,69,Yes,White,Hispanic or Latino,Female,"Other, specify",Posterior and Anterior Endometrium,Unifocal,1.7,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,2,0.004075369,0.217301757,0.12764772,0.010845963,0.147513616,0.004932524,5.10E-07,0.487682541,0,0,0.021648923,0.022225746,0,0.221950913,0,0.002192122,0,0,0,0.031367877,0,0.064232161,0.012991405,0.253723841,0.012153007,0.00170767,0.02840326,0,0,0.157403077,0.83,3824.192222,6771.601901,10595.79412,0.363324137,-0.818,-0.696,1.18,0.51,-0.09,1.19,0.81,-0.73,0.25,1.23,0.39,-0.31,1.61,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1.55E-05,0.167869834,CNV_LOW,1.257412782,-0.041422012,-0.044486227,-0.047764927,0,0,0.000446531,0.346200922,0.449105173,0.204693905,MSS,No,WT,WT,1.59090564,1.417826394,0.42283286,6.044394119,5.781359714,3.584962501,CNV_low,12.96296296,7.407407407,61.11111111,7.407407407,3.703703704,7.407407407,Blood_normal,5b8ab142-f4da-4643-8c68-db13cfd072b2_gdc_realn.bam,c715d91e-8c73-4dfb-8bc8-7b40b85a66c3,Tumor,112790c0-190c-469d-9a8f-27abdcd1bea6_gdc_realn.bam,6ac5dcc3-1579-4f8f-9974-8183e970c433,Blood_normal,30d98464-2799-4a96-ac54-29b14e1512cc,Tumor,8396af38-2c13-409d-a101-ea1a579fc1e7,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_CTTGTA_S4_L006_R1_001.fastq.gz,433f4fc5-843a-4a1b-b993-dd413244a03f,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_CTTGTA_S4_L006_R2_001.fastq.gz,0ad8238f-6db6-42c0-ae96-4d7a5f0e7e7b,Tumor,92d49cdb-841a-47dd-8ff1-41957f9c5ddd,YES,PASS
+S030,C3L-00781,No,4,16,127N,C3L-00781-04,CPT0070260003,Tumor,No,Other_specify,FIGO grade 3,50 % or more,Endometrioid,YES,Normal,pT2 (FIGO II),pN0,cM0,No pathologic evidence of distant metastasis,Stage II,II,1,71.09,48,No,White,Hispanic or Latino,Female,"Other, specify",Anterior and Posterior Endometrium,Unifocal,5.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Loss of nuclear expression,Loss of nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,4 or more,0.005420698,0.189251581,0.194236095,0.019354278,0.076433938,0.003331587,1.25E-09,0.511971821,0.030621947,0,0.057724868,0.128965836,0,0.359625849,0.017896475,0.027238232,0,0.003992697,0,0.002789135,0.027796018,0.026623532,0.047401521,0.10620711,0.003669428,0.041396454,0,0.031426484,0.008152597,0.028471817,0.95,4745.871875,6643.16418,11389.03605,0.521439556,-0.437,-0.387,0.47,1.46,-0.81,-0.26,0.3,1.46,0.46,-0.38,-0.11,0.05,-0.42,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0.052006459,0.000146769,CNV_LOW,0.400215642,-1.54E-05,-0.000658373,-0.000764994,0.016533914,0.022152841,0.021908213,0.26933117,0.482803588,0.247865242,MSI-H,No,WT,Frame_Shift_Del_Nonsense_Mutation,5.315549182,4.80079975,3.694931928,10.29347165,9.763212367,8.596189756,MSI-H,12.21198157,2.304147465,60.59907834,19.93087558,3.456221198,1.497695853,Blood_normal,f66e9c98-6bf9-401d-908e-6647373ab6ec_gdc_realn.bam,337eceaf-f9ad-4aca-8ba6-501faf696188,Tumor,10716ab4-7a85-4ad5-a1e0-ff7e1339aa0a_gdc_realn.bam,44dc38c6-1db6-4fe2-9a78-154b71409bbd,Blood_normal,a0b19c7e-733f-460c-969d-2e6954549afb,Tumor,c118fa04-a798-4523-b02f-4f292c61f47c,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_CGTACG_S9_L003_R1_001.fastq.gz,0de24a9c-ebaa-45df-8405-9c753dce28b2,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_CGTACG_S9_L003_R2_001.fastq.gz,8fe73704-391c-40df-b4ed-7ddab091670b,NA,NA,YES,PASS
+S031,C3L-00905,No,3,10,130C,C3L-00905-02,CPT0063380003,Tumor,No,United States,FIGO grade 3,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN1 (FIGO IIIC1),Staging Incomplete,No pathologic evidence of distant metastasis,Stage III,IIIC1,1,44.81,64,No,White,Not-Hispanic or Latino,Female,"Other, specify",anterior and posterior,Unifocal,4.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,Unknown,0.004029459,0.052128545,0.12915044,0.04469791,0.0968925,0.00140797,9.64E-09,0.671693166,0.007473998,0,7.00E-04,0.011836886,0,0.053273559,0,0.006965088,0.009822934,0,0,0.029367134,0,0.160223537,2.00E-04,0.010375886,0,0.048482686,0.011250673,0,0,0,0.35,2073.093302,4354.875244,6427.968547,0.697873521,-0.134,-0.145,0.97,0.55,1.57,0.36,-0.47,1.2,-0.13,-0.05,-0.41,1.49,-0.99,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.491688464,0.138666321,CNV_HIGH,5.92206939,-0.007832131,-0.000201005,-0.215851256,0.131320241,0.112848771,0,0.133461604,0.647566516,0.21897188,MSS,No,WT,WT,1.434447489,1.349358266,0.207641466,5.807354922,5.672425342,2.584962501,CNV_high,20,12,42,14,8,4,Blood_normal,609e24b7-042b-4088-a100-b55d5f0beaf5_gdc_realn.bam,faace15d-2171-48b5-9abb-8eb89ca66633,Tumor,d678d221-9dbd-4e9f-8f94-9aaef5f7092f_gdc_realn.bam,e6585c40-504d-419a-b31d-bacbfadc10da,Blood_normal,8db9ff3a-558c-4fdb-88e7-310c92350b3b,Tumor,239efdc5-6656-41bd-acd4-c6ba3c99c4d0,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_GCCAAT_S2_L001_R1_001.fastq.gz,f8d21c9d-a65c-4aba-9cae-71eadbe611ff,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_GCCAAT_S2_L001_R2_001.fastq.gz,b84dc177-6c03-40d2-ae16-83bb9e772349,Tumor,62486e0a-5e3b-4d0d-9e5f-2ffa6553122e,YES,PASS
+S032,C3L-00918,No,2,7,131,C3L-00918-01,CPT0063790003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,1,43,68,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Both anterior and posterior endometrium,Unifocal,3,Cannot be determined,NA,Cannot be determined,NA,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Present,2,0.004322812,0.101417864,0.102821213,0.033989272,0.103538262,0.004822916,2.43E-08,0.649087637,0.026238122,0,0.013975788,0.006103286,0,0.247713667,0,0.096956018,0,0,0,0.074014313,0.013321546,0.037066542,0.063111119,0.113943164,0.030006631,0.024126845,0,0.046995801,0.040648716,0.035778439,0.87,4019.134039,6164.963056,10184.09709,0.491566743,-0.434,-0.122,1.59,0.67,1.11,0.36,1.14,0.06,0.12,1.21,0.41,0.19,1.32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0.018025661,3.33E-08,CNV_LOW,0.380049798,-0.00506506,-0.008649932,-0.011127615,0.001422013,0.002063857,0.004073356,0.172595657,0.476333232,0.351071112,MSI-H,No,WT,WT,3.667073084,3.1479216,2.274711708,8.566054038,7.994353437,6.965784285,MSI-H,15.7480315,2.755905512,56.2992126,17.71653543,3.149606299,4.330708661,Blood_normal,58bc0d8e-4cae-4a40-bc4a-2025637ff81d_gdc_realn.bam,b667c012-ee1a-4ddc-aee2-81635b8e37f4,Tumor,2d192478-8285-48eb-855b-40b8ce2a42a0_gdc_realn.bam,dbc8d39f-66cc-4ec1-8988-d6997c19f80d,Blood_normal,c04969da-c28a-4e6b-9b1a-cec0d56df395,Tumor,10d59b04-f3be-4df0-a5ad-217de279d020,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_CTTGTA_S7_L008_R1_001.fastq.gz,3d47af04-fe23-4f4d-8ec2-66420dd82fef,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_CTTGTA_S7_L008_R2_001.fastq.gz,83a66e82-d19f-4dcd-bf04-33397c2160a9,Tumor,48c97656-0509-49aa-aa9c-69f8407e0704,YES,PASS
+S033,C3L-00921,No,4,15,128C,"C3L-00921-02,C3L-00921-03",CPT0064150004,Tumor,No,United States,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT2 (FIGO II),pN0,cM0,No pathologic evidence of distant metastasis,Stage II,II,1,32.32,66,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior,Unifocal,4,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.006487549,0.065319095,0.13841229,0.039595057,0.052963288,0.002937646,1.74E-09,0.694285073,0.015676172,0,1.00E-04,0.064132247,0,0.414372638,3.00E-04,0,0,0,0.073868667,0,0.015968111,0.015992922,0.02439787,0.229949042,0.030731553,0.006704016,0,0.045033677,0.019763217,0.013000122,0.97,4267.943026,5550.608927,9818.551954,0.509315266,0.35,-0.0653,-1.3,-0.31,-1.23,-0.4,-0.45,1.32,0.33,0.07,0.28,-0.28,-0.04,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.075161896,0.167332085,CNV_LOW,2.04679354,-0.043062122,-0.045147436,-0.04007455,0.008340476,0.000435609,0,0.107124413,0.428986832,0.463888755,MSS,No,WT,WT,1.295786701,1.2211157,0.168435383,5.584962501,5.459431619,2.321928095,CNV_low,13.95348837,6.976744186,62.79069767,2.325581395,0,13.95348837,Blood_normal,5af0168b-bb1a-4771-a478-cb4f8ec8fde8_gdc_realn.bam,2bb81d8c-9d4e-441e-ad35-b5c5474a8559,Tumor,b9d8510c-3b0e-49fb-98a3-c7db2cd05da7_gdc_realn.bam,08f80502-5363-432c-93ae-25f87330cf9e,Blood_normal,4d726701-04ed-4a0a-bcab-495568a316e5,Tumor,fa13139a-3e2c-49cd-9318-1d7dc55f2f19,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_CTTGTA_S7_L003_R1_001.fastq.gz,c8ac958e-2e20-4379-9568-7cc508e311de,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_CTTGTA_S7_L003_R2_001.fastq.gz,e292b646-ce77-45a1-a535-cb0dd26898e8,Tumor,c87d6095-66a7-4f45-8149-609a893df913,YES,PASS
+S034,C3L-00932,No,2,8,128C,C3L-00932-01,CPT0027120003,Tumor,No,Other_specify,FIGO grade 2,Not identified,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,47.82,67,No,White,Not-Hispanic or Latino,Female,"Other, specify",left cornu,Unifocal,1,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,3,0.004444401,0.141805626,0.085956044,0.04509666,0.117913805,0.001916371,5.28E-09,0.602867088,0,0,0.003502029,0.082780907,0,0.095558177,0,0.029154427,4.00E-04,0,0,0.02872328,0.005956481,0,0.016122823,0.120844083,0,0.003705692,0.076648017,0,0.006576348,0,0.47,4455.576971,4402.548423,8858.125394,0.340425353,0.696,1.28,-1.33,-0.46,-0.7,-0.89,-1.29,-2.22,0.4,-1.34,-0.6,-0.78,-0.47,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.76E-05,0.035198403,CNV_LOW,0.215064437,-0.000648148,-0.002843947,-0.007270925,0.124562884,0,0,0.034227034,0.250955331,0.714817635,MSS,No,WT,WT,2.603454103,2.246751627,1.2211157,7.366322214,6.930737338,5.459431619,CNV_low,16.52892562,5.785123967,60.33057851,10.74380165,4.958677686,1.652892562,Blood_normal,2b46f8aa-89b0-472b-969d-2ddd7b6266a5_gdc_realn.bam,727b0893-cc84-4b7e-b139-ea00f0a60d42,Tumor,6fea7283-a05f-4452-bcfc-4489af212782_gdc_realn.bam,9dbeb6c5-ec18-4cda-b9a0-170ac1f547ac,Blood_normal,81b1a4f5-dcab-4985-bdda-f1dfbca21017,Tumor,c1e1965b-85d8-40d2-a084-afa2886d1da3,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_CTTGTA_S13_L004_R1_001.fastq.gz,dd5354a9-7c2a-48a3-8381-96ed8ee9ea08,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_CTTGTA_S13_L004_R2_001.fastq.gz,14fc8a67-8464-475b-9c69-0ab40757f0d7,Tumor,5fff0072-1273-4d96-b935-981bdae15727,YES,PASS
+S035,C3L-00938,Yes,4,15,129C,C3L-00938-01,CPT0016260003,Tumor,No,Other_specify,FIGO grade 2,50 % or more,Endometrioid,YES,Low,pT1b (FIGO IB),pNX,Staging Incomplete,Staging Incomplete,Stage I,IB,1,45.53,53,No,White,Not reported,Female,"Other, specify",both anterior and posterior,Unifocal,5.5,Cannot be determined,NA,Cannot be determined,NA,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Present,1,0.006872997,0.335080023,0.123591418,0.051605789,0.184967556,0.007488289,3.65E-09,0.290393925,0.00842545,0,0.017126022,0.067062409,0,0.116555551,0,0.012975359,0,0.020200039,0,0.027592607,0.030691074,0,0.075508375,0.140170374,0.042521713,0,0.081171027,0,0,0,0.64,8257.35682,8516.249705,16773.60652,0.163635965,-0.35,0.484,-0.81,-1.13,0.63,-1.17,0.13,-1.66,2.41,-0.18,2.12,-1.49,-1.4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.053955297,0.110651098,CNV_LOW,NA,0,-0.005297803,-0.006504151,0.017870334,0,0,0.213855098,0.081698938,0.704445963,MSS,No,WT,WT,1.240148892,1.037474705,0.389123498,5.491853096,5.129283017,3.459431619,CNV_low,5.882352941,14.70588235,38.23529412,35.29411765,2.941176471,2.941176471,Blood_normal,354aacf4-4f9f-4177-8882-d3f996822ad9_gdc_realn.bam,98c830a5-8735-4f5f-812d-3e7f7c3aa514,Tumor,56a3cbdb-5dad-4d22-af0f-f56075c412b9_gdc_realn.bam,94e9963b-ecb8-490c-854c-2043f19224f1,Blood_normal,382e74bf-0a8c-439e-95c7-88d8d22024fc,Tumor,e4e1ff6a-e89a-4351-969b-1d7c583f21f4,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_CCGTCC_S3_L001_R1_001.fastq.gz,893adafa-41f8-4469-9c19-4b3c91b82e20,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_CCGTCC_S3_L001_R2_001.fastq.gz,aafe7ff3-0502-4175-a4a9-887ae8c2ec5b,Tumor,ef097688-a892-422e-9738-d16960061229,YES,Failed
+S036,C3L-00942,No,1,3,130C,C3L-00942-01,CPT0016830003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,Staging Incomplete,Staging Incomplete,Stage I,IA,0,31.58,64,No,White,Not reported,Female,Anterior endometrium,NA,Unifocal,4.5,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,None,0.005177041,0.12990102,0.131887618,0.033872945,0.080990031,0.003407333,4.10E-08,0.61476397,4.00E-04,0.002365816,0,0.123466351,0,0.20566446,0,0.047263698,0.010235752,0,0,0.04054915,0.030148189,0.095737426,0.041096529,0.124492647,0.012869295,0.002002463,0.05073331,0,0.012958822,0,0.8,3909.219425,6057.850781,9967.070206,0.420444125,0.569,0.8,-1,-0.91,-0.35,-0.2,0.05,-1.52,0.29,-0.26,0.46,-0.98,0.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018025661,0.041968396,CNV_LOW,0.730471136,-0.000222039,0,-0.018111091,0.426543057,0.01139247,0,0.142267913,0.404198571,0.453533516,MSS,No,WT,WT,2.218238939,1.967248569,0.801600757,6.894817763,6.569855608,4.64385619,CNV_low,14.89361702,3.191489362,58.5106383,15.95744681,3.191489362,4.255319149,Blood_normal,3bf4671c-36c5-4987-a7f4-7afefe6422cf_gdc_realn.bam,06ebf4f1-4419-4dc0-a0c5-cf71457d6008,Tumor,c87a0a06-a7c7-46b7-acf9-42ee5469ff63_gdc_realn.bam,408ac551-237e-4ba3-8fe9-f9dc6c007ad6,Blood_normal,e1b5b9a4-609d-4bb6-a61e-c0c9e499f9ef,Tumor,b3f61b74-c4f8-4a05-883f-9e04a3b70cdb,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_TTAGGC_S14_L008_R1_001.fastq.gz,34b5b547-c104-4c3b-b809-8c5fc7bf841d,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_TTAGGC_S14_L008_R2_001.fastq.gz,54fe3f69-db4c-4bb1-bfe6-957aab9aaf4b,Tumor,342b1f84-56a5-42a4-a813-8f6ea9b7ae33,YES,PASS
+S037,C3L-00946,No,1,4,130N,C3L-00946-01,CPT0027520003,Tumor,No,Other_specify,FIGO grade 1,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,Staging Incomplete,Staging Incomplete,Stage I,IB,1,17.64,64,No,White,Not reported,Female,Posterior endometrium,NA,Multifocal,3.9,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,1,0.004359049,0.102365644,0.094506407,0.032133044,0.094765293,0.003602011,8.38E-09,0.668268543,0,0,0.00172655,0.049229317,0,0.167282206,0,0.045841298,0,0,0,0.105500717,0,0.051160224,0.040816315,0.174904075,0.00363144,0.014442484,0,0.056142168,0,0.239323207,0.95,4238.332575,5643.777667,9882.110241,0.549858229,-0.284,-0.184,1.47,0.78,1.33,1.35,1.13,1.12,0.37,1.3,-0.67,1.52,1.02,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1.40E-06,0.035134947,CNV_LOW,0.459632928,-0.001213574,-0.008244015,-0.017806263,0.284680929,0,0,0.154403781,0.510604207,0.334992012,MSI-H,No,WT,WT,4.821482763,4.333716157,3.187681472,9.784634846,9.276124405,8.038918989,MSI-H,20.03231018,2.423263328,55.25040388,14.70113086,4.36187399,3.231017771,Blood_normal,e1096e9c-9941-455a-861b-af3afbfaf88d_gdc_realn.bam,ae16fac3-238d-44ad-8b5f-c41ad8f2a58c,Tumor,49c45165-41d4-4682-b984-8dee018efa9a_gdc_realn.bam,9f7c16e3-2d73-42a0-9d4b-61270a1169d9,Blood_normal,8b361ca4-6848-4ba3-9c16-5c2cf1872ff4,Tumor,ba76dbf1-372a-4b71-aaac-d4c267ebbaa3,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ATCACG_S2_L001_R1_001.fastq.gz,dd5912cb-695c-487b-9267-291f8ebb54bb,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ATCACG_S2_L001_R2_001.fastq.gz,39a421b8-2826-44af-8ee6-af72e9a27cfa,Tumor,e3689d95-7cee-4906-9555-7bca7550557b,YES,PASS
+S038,C3L-00947,No,1,1,129N,C3L-00947-01,CPT0027420003,Tumor,No,Other_specify,FIGO grade 2,Not identified,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,36.84,71,Yes,White,Not reported,Female,Posterior endometrium,NA,Multifocal,1.8,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,2,0.004528554,0.039355715,0.085921464,0.044274755,0.13572922,0.004577919,1.35E-08,0.685612359,0.029244357,0,0.089815096,0.089063113,0,0.15312548,0,0.070066347,0,0.0128425,0,0.050743363,0.054421936,0,0.032144626,0.208800355,0.005772881,0.005438661,0.046598774,0,0.017523387,0.004399126,0.87,2629.892485,5752.438575,8382.33106,0.486689804,-0.304,0.0228,0.67,0.33,-0.44,1.01,-0.25,0.3,0.61,-0.44,-0.27,-0.96,0.84,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.001969706,3.50E-05,CNV_LOW,0.067541355,0,0,0,0.003398339,0.002129629,0.001831731,0.168378061,0.608515395,0.223106544,MSS,No,WT,WT,1.46712601,1.434447489,0.086674411,5.857980995,5.807354922,1.584962501,CNV_low,27.27272727,12.72727273,40,7.272727273,9.090909091,3.636363636,Blood_normal,151447ea-0a29-4f5b-b2b5-c844ecd8e530_gdc_realn.bam,0a956166-4f53-45a7-afcd-27ff6a956bb4,Tumor,8d88f32c-d396-4335-a5bf-6071ae72e740_gdc_realn.bam,d60d7a82-029f-47bc-82b9-0bdd09cbb8f3,Blood_normal,f09df81d-7521-4c7f-ade2-c7c64f7f6146,Tumor,766f7db5-1abd-45f6-b662-27fd77070be2,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ATTCCT_S5_L002_R1_001.fastq.gz,989082f5-abb2-4d47-a6b3-e3560b9b38d3,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ATTCCT_S5_L002_R2_001.fastq.gz,8d9b3ee8-b37d-4e67-856c-149dcd51f602,Tumor,e5c5a5b0-9951-49b1-ae6d-1da6f9ad75e7,YES,PASS
+S039,C3L-00949,No,1,2,130C,C3L-00949-01,CPT0016370003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,Staging Incomplete,Staging Incomplete,Stage I,IA,1,37.69,64,Yes,White,Not reported,Female,"Other, specify",along anterior and posterior surface,Unifocal,2.5,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,2,0.005098513,0.118553064,0.118607577,0.047321103,0.091623186,0.002330233,2.73E-09,0.616466321,0.014017651,0,0.006541175,0.065985323,0,0.172918892,0,0,0.00687843,0,0.016716326,0.00110555,0.010850501,0.00770542,0.004253386,0.130015737,0.01803949,0.006207267,0.028764852,0,0,0,0.49,4333.499845,4851.094335,9184.59418,0.342228576,0.667,0.864,-1.04,-0.47,-0.81,-1.05,-1.1,-0.25,0.71,-0.59,-0.31,-1.13,-0.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.04828426,CNV_LOW,0.557054701,-0.014771766,-0.007665712,-0.012735819,0,0,0,0.045648765,0.417360147,0.536991088,MSS,No,WT,WT,1.483191823,1.383998466,0.245810227,5.882643049,5.727920455,2.807354922,CNV_low,21.15384615,9.615384615,50,13.46153846,3.846153846,1.923076923,Blood_normal,ff9ecfd3-1cb1-4fbc-b644-7060d84c4df2_gdc_realn.bam,6dd56638-c056-487f-a7fe-f446da41b8e2,Tumor,b4f80061-4d26-41c1-9931-7423d6eb4b8a_gdc_realn.bam,182fea04-02cf-4d59-a7b5-ea460b167acc,Blood_normal,bb4ccd90-f05f-48bc-b1bf-31420ea52bff,Tumor,f027559a-6562-4a93-a278-256e1a8d6bf7,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CAGATC_S5_L008_R1_001.fastq.gz,acc81064-08f0-457b-9629-1bb742e4ceb6,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CAGATC_S5_L008_R2_001.fastq.gz,1e750be5-488d-4a05-9c08-928700638328,Tumor,c7086fa4-9b10-4c98-b740-4738c4ae16c2,YES,PASS
+S040,C3L-00961,No,2,8,131,C3L-00961-01,CPT0027320003,Tumor,No,Other_specify,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,28.5,59,No,White,Not reported,Female,Anterior endometrium,NA,Multifocal,4.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Absent,2,0.004460847,0.138551785,0.109482646,0.040021967,0.080876696,0.003183905,2.78E-09,0.623422151,0.010868604,0,0.007948338,0.109885559,0,0.36866113,0,0.073904574,0,0,0,0.03239514,0,0.083275431,0.064399209,0.179213063,0.014219009,0.060779007,0.034147031,0,0.02579255,0.074511355,1.14,5102.751057,5768.565065,10871.31612,0.397233013,0.203,0.661,-1.02,-0.52,0.98,-0.79,0.25,0.47,0.31,0.16,0.08,-0.72,0.09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018025661,0.064640447,CNV_LOW,0.321551621,-0.008686236,-0.004589812,-0.000740079,0,0,2.15E-05,0.113551357,0.22946024,0.656988404,MSS,No,WT,WT,1.417826394,1.313865823,0.245810227,5.781359714,5.614709844,2.807354922,CNV_low,10.41666667,2.083333333,75,10.41666667,2.083333333,0,Blood_normal,2ebebf6f-5324-4914-b811-16b21e534c86_gdc_realn.bam,4d3a50d1-30e2-41fa-bea1-18e48bb47d4f,Tumor,56a0a87f-0053-436e-be1c-a0a3c975e19e_gdc_realn.bam,266c147a-cbf1-4b6d-8bb7-2497f2ad9013,Blood_normal,7ede6f1d-f31a-4122-b9a4-efec55776a69,Tumor,2c79d829-0805-426f-bde9-d7862506dc75,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ACTGAT_S4_L002_R1_001.fastq.gz,339ad507-4d3e-4c9e-ae1c-5a207645cf9b,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_ACTGAT_S4_L002_R2_001.fastq.gz,6af2d835-4e67-4d59-9cf9-da8f0d197f44,Tumor,e8e85df1-c1e5-4a83-a5d8-1d35e2cf6351,YES,PASS
+S041,C3L-00963,No,2,8,129C,C3L-00963-01,CPT0016490003,Tumor,No,Other_specify,NA,50 % or more,Serous,YES,Normal,pT1b (FIGO IB),pNX,Staging Incomplete,Staging Incomplete,Stage I,IB,1,34.89,59,Yes,White,Not reported,Female,"Other, specify",along anterior and posterior surface,Unifocal,2.6,Positive,-1,Positive,-1,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of expression,Vimentin Negative HNF 1 Beta Negative WT 1 Focally positive,Cannot be determined,1,0.005209019,0.06407184,0.101627765,0.035251651,0.115537488,0.003356582,1.18E-09,0.674945653,0,0.007246349,0,0.078174173,0,0.190698911,0,0.044134912,0,0,0,0.10639045,0.002564539,0,0.050662903,0.152417514,0.005672928,0.07376121,0.052446093,0,0,0.045830018,0.81,3781.400158,6255.195762,10036.59592,0.662501033,-0.94,-0.601,0.02,-1.54,1.14,0.42,0.48,0.27,0.14,0.9,-0.99,-0.58,1.15,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0.248423835,0.095571936,CNV_HIGH,2.73147802,-0.003401977,-0.000550972,-0.011154762,0.020039557,0.162623864,0.046533793,0.221445079,0.279277989,0.499276932,MSS,No,WT,WT,1.277478143,1.201828046,0.168435383,5.554588852,5.426264755,2.321928095,CNV_high,26.19047619,14.28571429,47.61904762,4.761904762,7.142857143,0,Blood_normal,6bf834f1-56da-43d9-98d1-6430d3011f86_gdc_realn.bam,36a10350-2017-46dc-ae84-9d87036edd79,Tumor,8dd015d6-ae3a-40d5-bc79-798ae6e58d69_gdc_realn.bam,ab63ee96-3de7-40ee-92a4-1ca8a2bca48d,Blood_normal,1fc95228-bfef-4385-8139-e212a669da09,Tumor,5df1af76-66e7-4bf2-af84-e795e172fc9b,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CTTGTA_S2_L007_R1_001.fastq.gz,1041d695-f7af-49a9-a30c-b81115bd4ea7,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CTTGTA_S2_L007_R2_001.fastq.gz,dfa1ae1e-a25a-483a-8e30-29a16c5d9939,Tumor,a4951b84-43a1-4422-89fc-dfe9b6121efb,YES,PASS
+S042,C3L-01246,No,2,7,127N,C3L-01246-01,CPT0080980003,Tumor,No,Other_specify,NA,under 50 %,Serous,YES,Normal,pT1a (FIGO IA),pN0,Staging Incomplete,Staging Incomplete,Stage I,IA,0,39.14,62,No,White,Not reported,Female,Posterior endometrium,NA,Unifocal,2.3,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,1,0.002196903,0.171428644,0.062381378,0.031593671,0.114757023,0.003184432,8.60E-09,0.614457941,0.03118917,0,0,0.015260895,0,0.079155671,0,0.023992102,0,0,0,0.024890194,0.041579984,0.008591921,0.060980028,0.0947925,0.017781494,0,0.05178604,0,0,0,0.45,5711.460988,4979.833929,10691.29492,0.480357661,-1.06,-0.484,0.23,-0.82,-0.03,0.62,-0.69,0.58,1.08,-0.45,-0.8,0.2,-2.16,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0.299821411,0.323246107,CNV_HIGH,2.835751595,-0.025528506,0,-0.160785654,0.028202121,0.203619111,0.002579581,0.11312731,0.209490344,0.677382346,MSS,No,WT,WT,1.530342916,1.499080695,0.086674411,5.95419631,5.906890596,1.584962501,CNV_high,13.55932203,22.03389831,40.6779661,8.474576271,1.694915254,13.55932203,Blood_normal,69f5bb55-732f-438f-bd16-8a465e97d464_gdc_realn.bam,e51f6394-96cc-4d48-954b-a578ccea4a6b,Tumor,a1aa4887-909b-4409-a7cb-96399f56632b_gdc_realn.bam,67ae0bf3-c217-40d2-9e0f-031c88d3d165,Blood_normal,2dac5fa1-c56c-4425-ba61-8270014816ca,Tumor,989fb585-18b1-4e94-8d2d-48cc86534d73,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_CTTGTA_S20_L005_R1_001.fastq.gz,150efab8-0b73-4f1e-be49-e317b0d739ee,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_CTTGTA_S20_L005_R2_001.fastq.gz,f5b7c2a1-9ed4-451e-880b-9ad37fda8ddb,NA,NA,YES,PASS
+S043,C3L-01247,Yes,3,10,128N,C3L-01247-01,CPT0080700003,Tumor,No,Other_specify,NA,50 % or more,Serous,YES,Low,pT1b (FIGO IB),pN1 (FIGO IIIC1),Staging Incomplete,Staging Incomplete,Stage III,IIIC1,1,38.44,63,No,White,Not reported,Female,"Other, specify","If Other, specify: Tumor occupies 100% of endometrial surface",Unifocal,11,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,1,0.003136131,0.270098168,0.085891909,0.057038104,0.255350012,0.003692105,1.53E-09,0.324793568,0.020459611,0,0,0.038044018,0,0.145117849,0,0.02712678,0,0.032517355,0,0.046517276,0.027718393,0,0.04266191,0.126920318,0.043941888,0,0.093350416,0,0.005624186,0,0.65,6791.999903,6499.908029,13291.90793,0.125566453,0.4,0.812,-0.76,-1.25,0.11,-0.46,-0.42,-1.62,2.03,-0.47,0.72,-2.07,-0.54,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0.202564688,0.250547398,CNV_HIGH,NA,-0.00384073,-0.01101206,-0.029228366,0,0,0.004175319,0.097496233,0.090973238,0.811530529,MSS,No,WT,WT,0.580489024,0.580489024,0,4.087462841,4.087462841,0,CNV_high,25,12.5,31.25,25,6.25,0,Blood_normal,75350a75-f96e-4de2-a8df-44298992a35d_gdc_realn.bam,7e9c5ca9-4e5b-475a-b3f9-beb59939743b,Tumor,18a5e367-653a-430f-95b3-4d6b504e6c55_gdc_realn.bam,faab2e37-4298-4c8d-9e97-f916b328eb3a,Blood_normal,9a71ebf5-d978-4980-8894-e3b597321b3f,Tumor,f8d1546e-8793-48a7-9853-31626061778f,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_TAGCTT_S18_L005_R1_001.fastq.gz,20f8e780-92e0-43e6-b7a4-6e84f39235c8,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_TAGCTT_S18_L005_R2_001.fastq.gz,340e18d4-ab3f-4a19-8efa-19a46955ea2f,Tumor,e1074569-5cd3-4f9f-9c26-770fd0f849ea,YES,Failed
+S044,C3L-01248,No,1,1,130C,C3L-01248-01,CPT0080300003,Tumor,No,Other_specify,FIGO grade 1,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pNX,Staging Incomplete,Staging Incomplete,Stage II,IB,0,59.78,42,No,White,Not reported,Female,Posterior endometrium,NA,Unifocal,6.3,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,1,0.004866398,0.396427238,0.07116207,0.041579539,0.133829893,0.003523899,4.37E-08,0.348610921,0.014830874,0,0.046343582,0,0,0.124923504,0,0.061050076,0,0,0,0.054248224,0,0,0.016167916,0.123462338,0.004972786,0,0.097073479,0,0.00128331,0.105643912,0.65,6134.476114,6239.261385,12373.7375,0.210133745,-0.768,0.428,0.58,-0.02,-0.65,-0.03,0.17,-0.35,1.68,0.71,0.39,-1.07,1.14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.062727583,CNV_LOW,0.112300954,-0.002417106,-0.000334779,-0.003088754,0,0,0,0.164944633,0.100259031,0.734796336,MSS,No,WT,WT,0.42283286,0.207641466,0.245810227,3.584962501,2.584962501,2.807354922,CNV_low,20,20,40,20,0,0,Blood_normal,4524e78f-96c8-41f1-a6a8-e96abbd54c27_gdc_realn.bam,2b85a1a0-d4be-4eb0-9424-4ad4d38c853e,Tumor,e014ed4a-7325-4e39-b236-f63b6a17e319_gdc_realn.bam,ec333f4b-3db0-4c11-a38f-4795b11bb47f,Blood_normal,9aae580f-d6ae-4920-8e2f-a249be96c57f,Tumor,3aa26966-41cf-4ab9-9446-0e19fa01b054,Tumor,170908_UNC31-K00269_0080_BHLJ52BBXX_CCGTCC_S24_L006_R1_001.fastq.gz,f19a6801-b116-4e0d-b755-5f72f604f648,Tumor,170908_UNC31-K00269_0080_BHLJ52BBXX_CCGTCC_S24_L006_R2_001.fastq.gz,848dfac4-6067-4a87-8e2c-887b5e4a3272,Tumor,cb40d16a-6ca7-4e7d-8d2b-9266a76d0a4d,YES,PASS
+S045,C3L-01249,No,1,2,128C,C3L-01249-01,CPT0080880003,Tumor,No,Other_specify,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,Staging Incomplete,Staging Incomplete,Stage I,IA,0,38.89,65,No,White,Not reported,Female,"Other, specify",Tumor occupies 75% of endometrial surface,Unifocal,6.5,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,1,0.005506597,0.082586753,0.128255657,0.022851227,0.133562549,0.011554706,5.71E-09,0.615682507,0,0,0.058490698,0.047550012,0,0.186223789,0,0.034617457,0,0,0.019629646,0.00418142,0,0.106226948,0.018306136,0.213574815,0.02868622,0.02101373,0,0.014757236,0,0.266741893,1.02,4435.707788,7668.0101,12103.71789,0.295209837,-0.546,-0.936,1.66,0.31,0.44,1.23,1.37,0.66,0.72,1.72,1.47,-0.31,2.05,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.02005336,4.15E-05,CNV_LOW,0.108108442,-0.001843017,-0.002387141,-0.005517554,0.000652912,0.001237317,0.00079462,0.413698802,0.293810101,0.292491097,MSS,No,WT,WT,1.182279033,1.05907234,0.245810227,5.392317423,5.169925001,2.807354922,CNV_low,20,5.714285714,62.85714286,5.714285714,2.857142857,2.857142857,Blood_normal,a9dfb9b8-ccc8-4c7f-8658-9bc8098e270e_gdc_realn.bam,806818d2-ac05-4017-a2f5-56b1cd9f8c22,Tumor,03268ce1-fb36-4573-ae32-127e717098ac_gdc_realn.bam,cf4aa5f9-c779-4b63-a58a-11b5bcf1365c,Blood_normal,04c9b457-112e-4e52-ac06-eac47fa8a994,Tumor,cfac3e22-503c-461b-82b7-95451aa8dcf3,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_GGCTAC_S17_L005_R1_001.fastq.gz,5bdc0383-216a-479a-b641-502eed2b241a,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_GGCTAC_S17_L005_R2_001.fastq.gz,1e55bad8-2e97-4693-9859-81d5834c997f,Tumor,bf806330-6f4c-44e9-a142-609db29497cd,YES,PASS
+S046,C3L-01252,No,1,1,128C,C3L-01252-01,CPT0080610003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,Staging Incomplete,Staging Incomplete,Stage I,IA,0,38.41,76,Yes,White,Not reported,Female,Posterior endometrium,NA,Unifocal,0.9,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,4 or more,0.006402477,0.137741651,0.158318728,0.038989869,0.12208323,0.005223646,2.99E-09,0.531240397,0.010769803,0.001,0,0.036872541,0,0.222912019,0,0.01494661,0,0.040490228,0,0.033949534,0.02840839,0,0.015098039,0.248352302,0,0.055190295,0.032160254,0.002601277,0,0.027284937,0.77,4965.604393,6434.325792,11399.93018,0.339926771,1.1,0.316,0.27,-1.26,0.08,0.36,0.66,-2.38,0.74,0.65,0.03,-0.94,1.29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.003030803,0.018025661,CNV_LOW,0.506601471,-0.002370195,-0.008578778,-0.019031371,0.007044113,0.004247474,0.004817637,0.164304785,0.39622699,0.439468225,MSS,No,WT,WT,3.981739572,3.542277219,2.364182099,8.903881846,8.430452552,7.076815597,CNV_low,12.79069767,1.453488372,68.89534884,9.593023256,4.651162791,2.61627907,Blood_normal,a7a105df-d4f7-4bef-87b3-01819d8bcf0b_gdc_realn.bam,ca598d4d-9330-4293-9d63-45a5a19c02db,Tumor,006c3a0a-73c6-45b5-9520-cffaf0a8cbff_gdc_realn.bam,fcd8e9ca-8399-4824-9e79-e6c582f7dc53,Blood_normal,b738031e-17f5-4a3d-b10e-963d5cc0eab8,Tumor,0a71692b-69f2-4d45-9740-59e1b1025326,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_GATCAG_S19_L005_R1_001.fastq.gz,d36d42d2-afb4-4098-bac2-2c0d503c2f67,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_GATCAG_S19_L005_R2_001.fastq.gz,992384ee-8c0e-4c30-8732-561df93e281a,Tumor,c90896d0-97cb-4a71-b6f8-1be802fee673,YES,PASS
+S047,C3L-01253,Yes,3,10,131,C3L-01253-01,CPT0080400003,Tumor,No,Other_specify,FIGO grade 2,50 % or more,Endometrioid,YES,Low,pT1b (FIGO IB),pN0,Staging Incomplete,Staging Incomplete,Stage I,IB,1,31.83,77,Yes,White,Not reported,Female,"Other, specify",Both anterior and posterior endometrium are remarkable for tumor,Unifocal,5,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,2,0.006663685,0.083032527,0.100995526,0.06542584,0.249481246,0.00262345,1.15E-08,0.491777716,5.00E-04,0,0.005305717,0.097284872,0,0.033678849,0,0.006521843,0,0,0,0.015883132,0.011483769,0,0.006033839,0.112393435,0,0.006194443,0.064674674,0,0,0,0.36,6515.371607,5528.531387,12043.90299,0.120884628,1.06,0.449,-2.52,-1.37,-0.18,-1.6,-1.25,-2.56,1.21,-1.52,-0.22,-1.83,-0.37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.51771591,0.488554072,CNV_HIGH,NA,-0.000733573,0,0,0,0.000419021,0.002139666,0.021332253,-7.99E-18,0.978667747,MSS,No,WT,WT,0.086674411,0.086674411,0,1.584962501,1.584962501,0,CNV_high,0,0,50,50,0,0,Blood_normal,0ccc9616-79cc-4ff5-b33b-2eac15b4078d_gdc_realn.bam,b0341a83-609c-4901-ba96-f76af59d0ec9,Tumor,a22d7330-2df5-4cad-9469-65de69d006eb_gdc_realn.bam,b35bb305-3e5f-450b-b7d7-53063757be27,Blood_normal,b5ce9048-1883-4704-9b11-e8ac3738a5e9,Tumor,dc7fc77c-0ce3-4ae4-9800-583943850fe9,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_ACTTGA_S24_L006_R1_001.fastq.gz,947060cd-6c80-47b7-8330-3b0d802d9d50,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_ACTTGA_S24_L006_R2_001.fastq.gz,430e7fee-c564-4b6d-9c18-3249e79c57d1,Tumor,3229853d-cb9d-4a3f-8f68-f47a20af074e,YES,Failed
+S048,C3L-01256,No,1,3,129C,C3L-01256-01,CPT0073450003,Tumor,No,Other_specify,FIGO grade 1,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,Staging Incomplete,Staging Incomplete,Stage I,IB,0,34.37,75,Yes,White,Not reported,Female,Anterior endometrium,NA,Unifocal,4.3,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,4 or more,0.006931651,0.10085879,0.117115467,0.045018409,0.091572847,0.005441517,4.85E-09,0.633061315,0.015292295,0,0.041585333,0.108651861,0,0.192326822,0,0,0,0,0,0.029434116,0.051242322,0,0.00987097,0.165327623,0.059757333,0.006758145,0.051687953,0.005498505,0,0.042566719,0.78,5548.489367,6298.298259,11846.78763,0.351910974,0.321,0.622,-0.99,-0.88,-0.14,-0.88,0.34,-0.53,0.89,0.54,1.19,-1.64,0.25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.017570882,0.047402035,CNV_LOW,0.274624621,-0.006828509,-0.002305824,-0.005376077,0,0,0,0.138124857,0.144692723,0.71718242,MSS,No,WT,WT,1.121990524,1.037474705,0.168435383,5.285402219,5.129283017,2.321928095,CNV_low,20.58823529,2.941176471,58.82352941,5.882352941,2.941176471,8.823529412,Blood_normal,2df1178d-478f-43e4-9bb3-69c9cb9bc38e_gdc_realn.bam,709a7bc9-9f91-46a9-99e0-0f1f5c8879a1,Tumor,cb2e92de-4d9e-4273-bb97-d5310fd76b5b_gdc_realn.bam,e3ac065a-67d7-4f3d-a9ad-ea2a35806d87,Blood_normal,01b7fb65-f024-4aaa-8f7c-ae8d5457633a,Tumor,dfc183e0-7cd4-4b7d-b213-902bb7a9daee,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_ATGTCA_S23_L007_R1_001.fastq.gz,aafd76a4-f0d2-4e99-bc97-34a7f2c06846,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_ATGTCA_S23_L007_R2_001.fastq.gz,9c0ad385-aa72-4df1-a53b-57f77602a7d7,Tumor,60fc4dee-b19e-453c-9b74-a0f85adcc723,YES,PASS
+S049,C3L-01257,No,2,7,128N,C3L-01257-01,CPT0073520003,Tumor,No,Other_specify,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,Staging Incomplete,Staging Incomplete,Stage I,IA,0,17.11,71,No,White,Not reported,Female,"Other, specify",Tumor involves 75% of endometrial cavity per diagnostic pathology report,Unifocal,8,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,2,0.007955392,0.030422016,0.136826018,0.035453503,0.063623215,0.007146413,2.80E-09,0.71857344,6.00E-04,0.014150682,0,0.109470307,0,0.155098137,0.062742251,0.081580939,0.00442106,0.022112668,0,0.027640251,0,0.145158987,0.070700628,0.279895555,0.006471643,0,0,0,0,0,0.98,3346.731612,7805.287716,11152.01933,0.743161118,0.0343,0.758,-0.75,0.83,-0.83,0.34,0.83,0.25,-1.44,0,1.14,-0.39,-0.58,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.020458729,0.014393735,CNV_LOW,0.56063435,-0.005789059,-0.006350382,-0.013075595,0.001938268,0.003173341,0.004985567,0.305938879,0.570248414,0.123812708,MSI-H,No,MS_indel,Frame_Shift_Del,4.093231531,3.576376116,2.618078608,9.022367813,8.46760555,7.383704292,MSI-H,12.18130312,5.949008499,63.45609065,15.01416431,1.416430595,1.983002833,Blood_normal,3db8446e-2099-484b-8423-a49189910b49_gdc_realn.bam,fd6b0c0e-dba4-47e8-aa1a-24b2a702de0e,Tumor,e2d35a3b-cb54-4f5f-a99f-5453033ff93f_gdc_realn.bam,bba2fa86-ecf4-484d-b7f8-d59cf9dd055c,Blood_normal,54d040a9-00d4-4a18-bad6-bdeaa1a758ba,Tumor,6caaa45a-4ac6-479c-adb2-e947066663f7,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_AGTTCC_S14_L004_R1_001.fastq.gz,7c9d4661-f204-4d60-b0da-9de70a2d29b7,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_AGTTCC_S14_L004_R2_001.fastq.gz,b802ec84-9a75-4c16-bf86-e3b4e2e29561,Tumor,6aff5f34-7ff5-46dc-bc4a-0a1c8f26c66a,YES,PASS
+S050,C3L-01275,No,3,9,129C,C3L-01275-01,CPT0073730003,Tumor,No,Other_specify,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,Staging Incomplete,Staging Incomplete,Stage I,IB,1,32.06,65,No,Not Reported,Not reported,Female,"Other, specify",100 PERCENT OF ENDOMETRIAL SURFACE INVOLVED,Unifocal,5,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,Unknown,0.005730778,0.109255916,0.119170745,0.027176374,0.148295618,0.005005061,2.25E-09,0.585365506,0.019563355,0,0.050302654,0.029312715,0,0.117139625,0,0.033005898,0,0.023439594,0,0.02729506,0,0.11382124,0.019701249,0.18378076,0.016770321,0.008110889,0,0.129636775,0.015382924,0.042736941,0.83,5035.794842,6267.438479,11303.23332,0.447122775,-0.0349,-0.34,2.14,0.35,-0.57,2.01,1.36,-0.48,0.96,1.68,0.33,0.07,0.92,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0.001338495,0.000348114,CNV_LOW,0.081096258,-0.00225703,0,-2.98E-05,0.000879966,0.003380363,0,0.178791861,0.38263115,0.438576989,MSI-H,No,WT,WT,5.123913178,4.570184685,3.59866904,10.09671515,9.523561956,8.491853096,MSI-H,13.19727891,2.176870748,58.0952381,21.36054422,2.993197279,2.176870748,Blood_normal,069a9d12-aaae-41f8-bcb3-4b2de10d0aec_gdc_realn.bam,8b94a041-a634-4162-8b99-11f5a880e871,Tumor,a9da5378-f198-4cde-a619-1b7834af72a6_gdc_realn.bam,dfa0615b-3d51-4baa-9381-5d7bd4140b88,Blood_normal,269d555e-a19b-42bf-81bf-4e0c26c978a4,Tumor,d3bcdff3-cc7d-4d33-8256-862a2135fca6,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GGCTAC_S1_L006_R1_001.fastq.gz,4b44bd70-9e38-452a-878f-4875577eaa2c,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GGCTAC_S1_L006_R2_001.fastq.gz,555f14f3-dae8-486d-8306-df6603731765,Tumor,609f19ba-1626-4b7f-a886-19a54da1c95d,YES,PASS
+S051,C3L-01282,No,1,4,128C,C3L-01282-01,CPT0077650003,Tumor,No,United States,FIGO grade 3,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,1,31.96,64,No,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Unifocal,3,Cannot be determined,NA,Cannot be determined,NA,Loss of nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of nuclear expression,Cannot be determined,NA,Present,Unknown,0.006071288,0.051567513,0.116252847,0.03522931,0.095754957,0.004295623,1.60E-09,0.69082846,0.008889822,0,0,0.145274304,0,0.151125497,0,0.073799185,0.00906967,0,0,0.05385969,0.023889679,0.027204716,0.070048837,0.168158407,0,0.002996522,0.079300673,0,0.016382998,0,0.83,3702.652027,5760.986958,9463.638986,0.613785706,0.209,-0.0404,-0.31,-1.64,1.01,-0.22,-0.06,-0.7,-0.1,-0.55,0.09,-0.51,-0.24,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.023242954,0.085416885,CNV_LOW,1.542454073,-0.003600071,-0.044166784,-0.050706515,0.566440807,0,0,0.153605478,0.502674298,0.343720225,MSI-H,No,WT,WT,4.964246973,4.530149616,3.187681472,9.932214752,9.481799432,8.038918989,MSI-H,15.96638655,1.820728291,47.75910364,24.92997199,5.322128852,4.201680672,Blood_normal,c7d1a1e3-525d-4ba3-85fe-f27b9c31a232_gdc_realn.bam,66b11653-2708-4457-a500-f5c599cdc2d0,Tumor,bdb69ab0-ab7e-42b7-a661-ddf5a6001729_gdc_realn.bam,17017778-73ee-4163-8c32-767f45e148d9,Blood_normal,2b3e454f-b10f-4fa3-be38-5be8b26150e4,Tumor,58cbc0e7-4448-4ecc-807e-f7386431c790,Tumor,170908_UNC31-K00269_0080_BHLJ52BBXX_AGTCAA_S23_L006_R1_001.fastq.gz,1055a2ed-0ffd-4a47-b98d-31f79271fecc,Tumor,170908_UNC31-K00269_0080_BHLJ52BBXX_AGTCAA_S23_L006_R2_001.fastq.gz,0d5983fb-0657-4a02-8a14-3606ea81b0d8,Tumor,1c741e52-cd71-4814-bb62-52ce7e4cdf2f,YES,PASS
+S052,C3L-01284,Yes,3,11,131,C3L-01284-02,CPT0079570003,Tumor,No,NA,NA,NA,Carcinosarcoma,YES,Normal,NA,NA,NA,NA,NA,NA,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.569,-0.451,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,NA,NA,NA,NA,0,-0.00271626,-0.002577586,0.000548697,0,0,NA,NA,NA,MSS,No,WT,WT,1.015548827,0.993284573,0.043988012,5.087462841,5.044394119,1,NA,12.5,12.5,59.375,12.5,0,3.125,Blood_normal,3d32ea57-7403-4d6e-ad7a-c86faa54393d_gdc_realn.bam,7725038c-3433-45d5-bda4-1b5dd52a9b87,Tumor,f36e8c4a-0fc4-4b27-9629-d4838f7ba815_gdc_realn.bam,c5047b0d-9e91-41d3-bef5-ba35d086456d,Blood_normal,bd10e94b-b778-44a2-a1af-3caf087d274f,Tumor,a9b52bf6-4681-41a9-a3c9-5c13609de510,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GCCAAT_S18_L005_R1_001.fastq.gz,60c35ad5-8f48-49d5-b0a1-fb2f195c929d,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GCCAAT_S18_L005_R2_001.fastq.gz,d69c5f9f-e4a8-4596-82e9-53b579863bda,Tumor,d0e638e0-fb56-4706-8edc-50d05fcec0c2,YES,PASS
+S053,C3L-01304,No,1,4,129C,C3L-01304-01,CPT0064210003,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,41.44,68,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior,Unifocal,3.7,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.00208603,0.045042235,0.101301135,0.169993489,0.109129131,0.00481661,4.92E-10,0.567631369,0,0,0.039919707,0.093416052,0,0.145822992,0.025389225,0.021802422,0,0.037699444,3.00E-04,0,0,0.02503175,0.077019546,0.202442392,0,0,0.071131317,0,0,0,0.74,3554.390593,6691.117149,10245.50774,0.524013536,-0.0754,0.608,-0.17,-1.19,0.1,-0.29,-0.66,-0.14,-0.39,-1.09,-0.63,-0.57,0.26,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,7.60E-05,0.000147956,CNV_LOW,0.658740375,-0.002380017,-0.001064044,-0.000937405,0.014794003,0.013065794,0.012025851,0.220275126,0.453568112,0.326156762,MSS,No,WT,WT,1.295786701,1.182279033,0.245810227,5.584962501,5.392317423,2.807354922,CNV_low,29.26829268,0,53.65853659,9.756097561,7.317073171,0,Blood_normal,3f01a5d9-4b5b-4aa5-a6db-3d2786936bde_gdc_realn.bam,abdfac63-1c07-4f0b-878b-e230e90fa27a,Tumor,3866d5e5-efed-427b-bc34-b5a9bb070eb6_gdc_realn.bam,3df8fea4-d4ec-484d-a89c-33fe123dedc8,Blood_normal,105d699b-2118-4678-8fe5-c056e33b374b,Tumor,f61a20f0-253a-4b03-822e-17e43cd61cd4,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_GTGGCC_S21_L006_R1_001.fastq.gz,2c0d1a81-0278-4dcf-99b1-8b846369e7a2,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_GTGGCC_S21_L006_R2_001.fastq.gz,06f5c317-1033-4c7a-b439-095b129762d6,Tumor,44aa28bf-2dbf-4a1a-a220-095a7a2d0d31,YES,PASS
+S054,C3L-01307,No,1,4,127C,"C3L-01307-01,C3L-01307-03",CPT0064290004,Tumor,No,United States,FIGO grade 3,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM1,pM1,Stage IV,IVB,1,31.63,74,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior,Unifocal,3.5,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,3,0.012745927,0.069778323,0.190886091,0.06036526,0.131387928,0.003685652,3.73E-09,0.531150815,0,0.028690669,0.055970953,0.057892742,0,0.143980157,0,0.021676235,0.021983045,0,0,0.037878075,0,0.141429525,0.016577828,0.154202848,0,0.003833063,0.026757166,0,0,0.009127693,0.72,3821.608966,5924.849456,9746.458422,0.612209667,-0.0648,-0.344,-0.11,-0.37,0.72,-1.69,0.08,0.88,0.14,-0.07,0.11,-0.24,-0.49,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.281230088,0.009184245,CNV_HIGH,2.928653664,-0.001479407,0,-0.206222747,0.089327755,0.089442964,0.036590969,0.224253983,0.423044883,0.352701134,MSS,No,WT,WT,1.121990524,1.037474705,0.168435383,5.285402219,5.129283017,2.321928095,CNV_high,5.882352941,26.47058824,35.29411765,11.76470588,14.70588235,5.882352941,Blood_normal,40844e74-4b1b-46ff-a4ea-d8f743dce828_gdc_realn.bam,88cb812e-8dbb-4315-b15d-2209fadb6c43,Tumor,c764cdc8-80c6-4b7a-a170-309fe582f17e_gdc_realn.bam,25baf71c-839d-4432-9960-59f47e1c37d7,Blood_normal,ac906496-2d2a-4621-b8f6-c4ee338878d8,Tumor,ae515f49-8fe2-4239-a18a-241d6cb28f2d,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_GTTTCG_S23_L006_R1_001.fastq.gz,7eae300f-fe7f-4059-876c-659d575a8f34,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_GTTTCG_S23_L006_R2_001.fastq.gz,9ad113e3-3bed-4906-a9d0-adc6e81379b4,Tumor,c9019ece-4b77-4014-805d-3aa888d9df7b,YES,PASS
+S055,C3L-01311,No,1,3,128N,C3L-01311-01,CPT0077770003,Tumor,No,United States,FIGO grade 1,Not identified,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,37.11,55,No,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Unifocal,3,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,None,0.004215926,0.109470974,0.101517641,0.06007514,0.143556921,0.006587758,4.74E-09,0.574575636,0.005798878,0,0.022069207,0.109550741,0,0.077021852,0.032685341,0.079008992,0,0.044567053,0,0.054539991,0,0.031163318,0.076748995,0.104517526,0.003387154,0,0,0.008075333,9.00E-04,0,0.65,4282.074991,6666.469204,10948.54419,0.478476847,-0.296,0.383,0.89,-1.73,0.83,0.73,0.16,-0.35,-0.08,-0.42,-0.13,-0.11,-0.4,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0.019980775,0,CNV_LOW,0.18174419,-0.01793059,0,0,0.002070001,0.005535154,0.01129473,0.111994798,0.59072105,0.297284152,MSS,Yes,WT,Missense_Mutation,6.473523558,6.465459001,0.580489024,11.47116713,11.46301341,4.087462841,POLE,33.98299079,0.212615167,44.22395464,6.236711552,1.240255138,14.10347271,Blood_normal,20730bb0-a420-4cc3-91ea-80c765ccd3cf_gdc_realn.bam,1186aea4-bfdd-45ef-bc0a-683c289fd617,Tumor,b56bd2bb-ba83-4a80-835e-1c42b477fb0a_gdc_realn.bam,b56d2614-afdd-4be1-8629-69a7cf24d6ce,Blood_normal,d373e6d9-c4a8-4c0b-953d-49aa71477f7f,Tumor,9b170866-2a3a-4ad9-bc1e-cf0d2804648c,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_ATTCCT_S12_L003_R1_001.fastq.gz,391b8dfd-1857-41ff-8af2-ea0830668f60,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_ATTCCT_S12_L003_R2_001.fastq.gz,5b4f8676-cead-4e7a-b932-58fb3d37f3a1,Tumor,85041653-41b9-4890-a61e-f635f4bc5a65,YES,PASS
+S056,C3L-01312,No,2,5,130C,C3L-01312-01,CPT0063670003,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,Staging Incomplete,No pathologic evidence of distant metastasis,Stage I,IA,0,31.96,56,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and Posterior,Unifocal,4,Cannot be determined,NA,Cannot be determined,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Cannot be determined,NA,Cannot be determined,1,0.005863154,0.146545755,0.114493109,0.045626743,0.103001166,0.005458865,1.02E-09,0.579011207,0.016955087,0,0.024427266,0.092732909,0,0.22358277,0,0.010177208,0.009819622,0,0,0.079006224,0.015489192,0,0.024328606,0.208602464,0.007996144,0.003921947,0.017436486,0.005250649,0,0.030273428,0.77,4262.424062,6385.554716,10647.97878,0.359457749,0.19,0.314,0.25,-0.35,0.34,-0.17,0.4,-1.32,-0.06,0.43,0.82,-0.73,1.49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.000224702,0.063228516,CNV_LOW,0.563707282,-0.016165776,-0.015521278,-0.016593224,0,0,0,0.158812029,0.426011719,0.415176252,MSS,No,WT,WT,1.295786701,1.142367912,0.319245674,5.584962501,5.321928095,3.169925001,CNV_low,20.51282051,2.564102564,53.84615385,12.82051282,5.128205128,5.128205128,Blood_normal,21d6d362-118e-4dfe-9130-7aeb1bf82058_gdc_realn.bam,142a53dc-3e84-4a26-8b71-e3c0926a54b7,Tumor,36a47ab1-152f-4078-855e-82e726b1778f_gdc_realn.bam,c03828a8-c41d-4fc6-a3ba-6ec5908a4f7a,Blood_normal,76e906be-7778-4801-9ae9-6200be0649d4,Tumor,5eacf3e4-6cc2-48d0-ac48-cdd70d4f591b,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_TAGCTT_S6_L008_R1_001.fastq.gz,bcbd006a-fd60-43db-905d-aa6e729f314a,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_TAGCTT_S6_L008_R2_001.fastq.gz,f3db77a2-db19-4942-8c2e-3b69f81a956b,Tumor,ef01f3f5-b7f4-4765-a579-804299a31ac2,YES,PASS
+S057,C3L-01744,No,1,3,127N,C3L-01744-01,CPT0093280003,Tumor,No,Other_specify,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,Staging Incomplete,Staging Incomplete,Stage I,IA,0,46.45,62,No,White,Not reported,Female,Anterior endometrium,NA,Unifocal,2.2,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,2,0.007891409,0.030381439,0.131031212,0.044015107,0.054719455,0.003346583,7.56E-11,0.728614796,0.00669145,0,0.01279678,0.162358545,0,0.060144906,0,0.084212413,0.015350342,0,0.02209679,0.002045284,0.02761867,0,0.029584537,0.137049713,7.00E-04,0.007310619,0,0.030856792,0.011223627,0,0.61,1682.151896,5590.277688,7272.429584,0.935807315,-0.473,-1.08,-1.39,0.03,-0.76,-1.06,-0.36,0.9,-1.93,-1.22,-0.58,1.69,-0.52,1,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.01749751,0.000115682,CNV_LOW,0.402217982,-0.016802222,-0.004861285,-0.01133029,0.008267534,0.012597006,0.007949318,0.148952464,0.77235307,0.078694467,MSI-H,No,MS_indel,Frame_Shift_Del_Nonsense_Mutation,5.382393153,5.055718949,3.24062152,10.36194377,10.02652344,8.098032083,MSI-H,12.18809981,1.535508637,60.84452975,19.00191939,4.510556622,1.919385797,Blood_normal,e3b971ec-48d2-431f-ae8b-719525fa1716_wxs_gdc_realn.bam,8b2f6755-245b-40fc-907d-062a356a39f3,Tumor,f52882fc-8b4f-47c1-bd89-bf72d116b7e6_wxs_gdc_realn.bam,e3388e0d-3dd6-4fab-81e0-5fd54ced76b3,Blood_normal,59d5779f-3b81-49d3-9e1e-c081765a3df5,Tumor,551e9aca-4705-4f05-8d2a-8f2dbc11eff7,Tumor,171205_UNC32-K00270_0069_BHN2HGBBXX_ACTGAT_S22_L006_R1_001.fastq.gz,0eefe67a-b297-4c2f-87ad-d25b83e43a2b,Tumor,171205_UNC32-K00270_0069_BHN2HGBBXX_ACTGAT_S22_L006_R2_001.fastq.gz,6d97b258-2fc8-4b10-9c6c-d1e7dac46106,Tumor,25f6a3d7-9619-461e-923d-7276d0ab2925,YES,PASS
+S058,C3L-01925,No,4,15,127N,C3L-01925-07,CPT0113690003,Tumor,No,United States,NA,50 % or more,Serous,YES,Normal,pT3b (FIGO IIIB),pN1 (FIGO IIIC1),Staging Incomplete,pM1,Stage IV,IVB,1,27.66,65,No,White,Not-Hispanic or Latino,Female,"Other, specify",Anterior and posterior endometrium,Unifocal,4.5,Positive,5,Negative,NA,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Intact nuclear expression,Loss of expression,"Ki-67, 57% ; positive for p16 (strong and diffuse), focally positive for CK5/6 and p40",Cannot be determined,None,0.003108123,0.245624092,0.052792773,0.013339386,0.100179506,0.001663694,1.86E-09,0.583292424,0.030012721,0,0.011458756,0.008603041,0,0.115811487,0,0.045137299,0,0,0,0.036534493,0.003605974,0.043674035,0.018902943,0.090333965,0,0.009959128,0.00596616,0,0,0,0.42,4255.959916,3163.142174,7419.10209,0.542145569,-1.46,-2.6,0.88,-0.19,-0.88,1.27,-1.49,0.4,1.67,-1.06,-1.29,0.04,-1.4,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.491125051,0.385955474,CNV_HIGH,5.48245768,-0.001048497,-0.005103458,-0.225720624,0.240745792,0.187988723,0.000426944,NA,NA,NA,MSS,No,WT,WT,1.162461482,1.080351414,0.168435383,5.357552005,5.209453366,2.321928095,CNV_high,22.22222222,19.44444444,36.11111111,13.88888889,2.777777778,5.555555556,Blood_normal,d354fd4a-1f75-4174-b0c9-10575ed3a4d5_wxs_gdc_realn.bam,ffc37f27-76c2-4df1-aaad-ed6f7d8e8ffb,Tumor,45a3f0fe-0662-4dfd-9f5e-f5b62faef683_wxs_gdc_realn.bam,dcefad95-d4c9-4736-b67e-7270c9e16673,Blood_normal,580c3cce-5125-41fd-933b-52017d1279e0,Tumor,70147939-557e-46d7-852f-38e1c1e7be31,Tumor,180508_UNC31-K00269_0128_BHTY7KBBXX_AGTCAA_S23_L006_R1_001.fastq.gz,49190371-ec2a-420f-a804-0931a346d206,Tumor,180508_UNC31-K00269_0128_BHTY7KBBXX_AGTCAA_S23_L006_R2_001.fastq.gz,c8dfbe14-a49c-4838-95c7-c9b9ed81633a,Tumor,78f10f4d-1533-4556-b43c-63bfb3734b02,NA,NA
+S059,C3N-00151,No,4,16,129N,C3N-00151-03,CPT0066620003,Tumor,No,Ukraine,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,cM0,No pathologic evidence of distant metastasis,Stage I,IB,0,27.1,60,Unknown,NA,NA,Female,"Other, specify",Entire uterine cavity,Unifocal,NA,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,2,0.016229609,0.066982605,0.117841991,0.037353248,0.110029945,0.003059245,8.19E-09,0.648503347,0.124378981,0,0.041497227,0.086170937,0,0.178005848,0.016912327,0.096795497,0,0,0,0,0,0.059535132,0.079014882,0.08291912,0,0.005009947,0.018592465,0.019894683,0,0.031272954,0.84,3508.374749,6137.36728,9645.742029,0.655787215,-0.0747,-1.93,1.36,1.98,-0.21,1.69,1.29,0.42,-0.18,1.2,0.93,-0.14,0.44,1,0,0,0,0,1,0,1,1,1,0,0,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,0,1,1,1,1,3.63E-05,3.33E-08,CNV_LOW,0.138302686,0,0,-0.008060267,0.004694148,0.002273465,0,0.227748077,0.560807043,0.21144488,MSS,Yes,WT,WT,8.317776613,8.31441313,0.801600757,13.32685077,13.32347703,4.64385619,POLE,35.05365854,0.253658537,29.45365854,11.22926829,0.917073171,23.09268293,Blood_normal,8252f908-a17d-475a-a94f-88043f2bf9da_gdc_realn.bam,94cf10bc-d882-429b-8083-bd4123f3a4bc,Tumor,73dee08e-268f-4107-b55d-ee1906f078e6_gdc_realn.bam,5246a7b0-58b3-425b-9c84-dfe8a361fc81,Blood_normal,c8094cb6-306f-499b-861c-54292228468d,Tumor,fc38dc22-853e-41e9-a8c6-ab43e06b3179,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_TTAGGC_S8_L008_R1_001.fastq.gz,325918b9-3905-49ff-b4f6-f76a88e30301,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_TTAGGC_S8_L008_R2_001.fastq.gz,31961972-9d79-45d4-a53c-b80875dd8e9a,Tumor,cf69a580-e2e3-4c41-8c07-0650b78e3a08,YES,PASS
+S060,C3N-00200,No,3,9,128N,C3N-00200-02,CPT0017870003,Tumor,No,United States,FIGO grade 2,Not identified,Endometrioid,YES,Normal,pT2 (FIGO II),pN1 (FIGO IIIC1),Staging Incomplete,Staging Incomplete,Stage III,IIIC1,NA,46.85,72,Yes,Black or African American,Not-Hispanic or Latino,Female,"Other, specify",anterior and posterior endometrial cavity,Multifocal,9,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,4 or more,0.003404556,0.437720749,0.069614216,0.034985182,0.080478688,0.003404118,1.24E-08,0.370392479,0.042572483,0,0.00647704,0.074472037,0,0.157712493,0,0.013428672,0,0,0,0.052652388,0.030101489,0,0,0.189955698,0,0.007935982,0.024691718,0,0,0,0.6,4684.861932,4838.564981,9523.426913,0.196347513,0.0985,0.924,-1.37,-0.7,-0.81,-0.78,-1.54,-1.05,1.24,-1.6,-0.18,-1.5,-0.6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.000813962,CNV_LOW,0.172992111,0,-0.004818201,-0.003167805,0.003746224,0.000747792,0,0.025074678,0.322261222,0.6526641,MSS,No,WT,WT,1.162461482,1.080351414,0.168435383,5.357552005,5.209453366,2.321928095,CNV_low,19.44444444,8.333333333,38.88888889,22.22222222,0,11.11111111,Blood_normal,136eb1b9-e1da-4a4d-9637-708e3553ba64_gdc_realn.bam,1d2a42cf-e43c-4ded-8a10-c1c0cc85791a,Tumor,7c591788-4359-4ae7-8dc2-6342e4726baa_gdc_realn.bam,78de42e6-acc0-49ab-8879-7af689f912b0,Blood_normal,636910f0-d39e-4463-abd3-f7dafbbf34fc,Tumor,f5332af5-667a-4788-b6c5-0a2690c79255,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_AGTTCC_S22_L006_R1_001.fastq.gz,c9c50554-d2cc-4a32-a115-38a396a175f3,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_AGTTCC_S22_L006_R2_001.fastq.gz,95fff05f-06dc-4463-a6b9-9cb88669fe24,Tumor,27b931e5-a3dc-4832-ac62-798ba69bec62,YES,PASS
+S061,C3N-00321,No,3,10,130N,C3N-00321-01,CPT0010260003,Tumor,Yes,Ukraine,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,26,64,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,1.5,Positive,80,Positive,30,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – negative,Cannot be determined,2,0.007932213,0.076065268,0.137075172,0.043301072,0.081580986,0.003773438,1.06E-09,0.650271851,0.038086734,0,0.043384893,0.098087625,0,0.303072564,0,0.120657123,0.001645347,0.054679848,0,0.073224264,0,0,0.099611915,0.207091546,0.046680774,0,0.033777368,0,0,0,1.12,3627.437843,6229.435198,9856.873041,0.715241228,0.456,-1.67,-0.41,0.22,0.73,-0.34,-0.19,-1.06,-0.39,-0.7,0.08,-0.29,-0.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0.012143202,0.035379732,CNV_LOW,0.723440323,-0.001605586,-0.004272987,-0.007245738,0.142891225,0.028710501,0,0.198948242,0.529494006,0.271557752,MSI-H,No,WT,Frame_Shift_Del_Nonsense_Mutation,5.050337259,4.572063684,3.371537285,10.02097994,9.525520809,8.243173983,MSI-H,13.85869565,1.222826087,60.73369565,19.29347826,3.260869565,1.630434783,Blood_normal,5af7cd9f-e8dd-49af-ac6a-6badeed9fd8d_gdc_realn.bam,215e2fdf-ca19-4def-9faf-067115d046d4,Tumor,1835754d-479f-4784-9083-921104cb7a68_gdc_realn.bam,e1661ddf-c57f-4d99-b859-53baacfb1010,Blood_normal,3a2749ed-da5e-477a-9cd4-900035dc627b,Tumor,da271d9f-79b1-40d3-8ccb-43eb62566689,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ATCACG_S21_L007_R1_001.fastq.gz,0de8cfbb-adb2-4d1d-91f6-685a03b16449,Tumor,170802_UNC31-K00269_0072_AHK3GVBBXX_ATCACG_S21_L007_R2_001.fastq.gz,cc11b6d3-897f-4158-96a4-91b5ebbbb74e,Tumor,295d4951-f892-48a8-b1a5-eb1851ba67c5,YES,PASS
+S062,C3N-00322,No,1,4,130C,C3N-00322-03,CPT0010430003,Tumor,Yes,Ukraine,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,31,70,No,NA,NA,Female,"Other, specify",Entire Uterine Cavity,Multifocal,2.6,Positive,85,Positive,50,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – positive,Cannot be determined,2,0.004036411,0.022281501,0.093054629,0.03878787,0.038672923,0.001245467,2.01E-09,0.801921196,0.008844079,0,0.029447879,0.024497424,0,0.034506425,0,0.001711434,0,0,0,0.010084213,0,0.007519195,9.00E-04,0.047962606,0.002492303,0,0.032059529,0,0,0,0.2,2569.07483,3671.485686,6240.560516,0.625552677,0.548,0.371,-2.32,-0.05,-1.21,-0.87,-1.93,-1.48,-0.9,-1.42,-0.51,0.17,-0.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.078502133,0.073526732,CNV_LOW,0.700120616,-0.001714734,-0.002022706,-0.014190813,0.274251993,0,0,0.035279107,0.397792557,0.566928335,MSS,No,WT,WT,1.797751858,1.663200075,0.389123498,6.339850003,6.14974712,3.459431619,CNV_low,20,2.857142857,62.85714286,7.142857143,4.285714286,2.857142857,Blood_normal,57e730bf-2e32-4cf2-907b-f191778b6783_gdc_realn.bam,4989f40e-696b-41f9-bf68-f0b89e76f9b8,Tumor,dfc8a271-3680-4e17-a894-06841e360ef2_gdc_realn.bam,a60a9f62-9124-4a9d-8c28-1df22de7e053,Blood_normal,c44027d8-b828-4d83-b821-f9c8b1dcd844,Tumor,f3d0e5ea-1ad6-444e-884f-d198aab1f134,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_L008_R1_001.fastq.gz,86f3b033-6491-4d96-bf41-f51ddfc6bb5b,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_L008_R2_001.fastq.gz,258cde18-4abd-4a5f-8140-23f1da45abdc,Tumor,1dd7ae14-7d99-44f5-9361-078cdd43606a,YES,PASS
+S063,C3N-00323,No,2,7,130C,C3N-00323-02,CPT0010610003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,27,78,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,4,Positive,90,Positive,20,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – positive in myometrium,Cannot be determined,Unknown,0.006382884,0.061490059,0.148799843,0.050010993,0.12871576,0.002939082,4.63E-09,0.601661375,0,0,0.029467374,0.068410488,0,0.140721701,0,0,0.011763169,0,0.037773189,0,0,0.184708541,0.037899661,0.153705136,0,0,0.045550741,0,0,0,0.71,3740.58003,5246.146449,8986.72648,0.660090527,0.755,-1.15,-0.46,0.64,0.24,-1.34,-0.73,0.55,0.15,-0.78,0.99,-0.38,0.39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.035623529,0.224999076,CNV_LOW,1.624050731,-0.00977493,-0.041093192,-0.007796096,0.190078055,0.051347771,0,0.210719013,0.356328585,0.432952402,MSS,No,WT,WT,1.560942049,1.514796483,0.128134019,6,5.930737338,2,CNV_low,15,5,66.66666667,3.333333333,6.666666667,3.333333333,Blood_normal,577c96fe-53aa-4c2d-9410-263e818b88d0_gdc_realn.bam,37e704ce-f3af-4ef0-ad64-d94fad73aa39,Tumor,5b342ae2-f869-4fb5-af0d-556559e43e54_gdc_realn.bam,ac736b73-851b-457f-a05b-ccd1cda0cf77,Blood_normal,f14df642-0372-4fce-98ae-e94369a425cd,Tumor,81a11798-0812-4fda-81fb-43b01a15f562,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_CGATGT_S6_L008_R1_001.fastq.gz,6c14578c-92ff-4085-a8d8-d5122ff4fd11,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_CGATGT_S6_L008_R2_001.fastq.gz,727a9891-1116-4c77-9787-a08111f0b3e8,NA,NA,YES,PASS
+S064,C3N-00324,No,3,11,130C,C3N-00324-04,CPT0010820003,Tumor,Yes,Ukraine,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT2 (FIGO II),pN0,cM0,Staging Incomplete,Stage II,II,0,35,66,Yes,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,2.3,Positive,80,Positive,70,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive. Vimentin (Vim3B4) – positive.,Cannot be determined,Unknown,0.004926435,0.040904304,0.123258586,0.039417667,0.046900078,0.006079859,6.37E-09,0.738513065,0.01866044,0,0.001911367,0,0,0.292386957,0,0.014451686,0.002204626,0,0.017253413,0.018403697,0.013078051,0.045221132,0.019022007,0.182363781,0.065752505,0.049617216,0,0.013628498,0.010695738,0.045348883,0.81,3563.533329,6318.746917,9882.280246,0.633737159,-0.0921,0.0846,-0.35,0.1,1.52,-0.62,0.17,0.38,-0.91,0.73,0.86,0.58,1.31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.000158276,CNV_LOW,0.816261462,-0.001916029,-0.002163191,-0.001009317,0.027806684,0.034279747,0.037680011,0.212727517,0.418050437,0.369222046,MSS,No,WT,WT,1.732044006,1.576001633,0.42283286,6.247927513,6.022367813,3.584962501,CNV_low,18.75,6.25,54.6875,14.0625,3.125,3.125,Blood_normal,cd24c578-62a3-4d17-bf84-b1c439977cce_gdc_realn.bam,1ce30c13-0c75-412b-86ee-5a951e7efb92,Tumor,5142f7db-1b0f-466b-bcb3-9704940ffdab_gdc_realn.bam,0aeb075f-7fad-4391-8773-a716e053eb2b,Blood_normal,913cfbfa-c656-4161-b42d-7d343b7cffeb,Tumor,417609db-9760-41da-af19-8258e8ab57ea,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_TTAGGC_S21_L008_R1_001.fastq.gz,a76e636b-9fd0-4cf2-b0c6-78ed366d7c3d,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_TTAGGC_S21_L008_R2_001.fastq.gz,ced11262-32d0-42c6-a91f-2d3f801ed551,Tumor,f6876ca4-bf05-46cb-8bf3-c2e1422e4765,YES,PASS
+S065,C3N-00326,No,2,8,130C,C3N-00326-01,CPT0022530003,Tumor,Yes,Ukraine,FIGO grade 1,Not identified,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,35,45,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,1.3,Positive,95,Positive,90,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – focally positive; Vimentin (Vim3B4) – positive,Cannot be determined,2,0.004979518,0.065285664,0.097412193,0.041874395,0.044211258,0.001658171,5.17E-10,0.7445788,0,0,0.024248712,0.040886712,0,0.113361734,0,0.001274594,0.004202812,0,0.034001026,0,0,0.087850691,0.002384896,0.053377612,0.011314816,0.021351223,0,0.024953588,0.001495829,0.01927318,0.44,1840.583053,3796.214544,5636.797597,0.719228044,-0.846,0.474,-1.04,0.49,-0.41,-0.8,-0.82,1.58,-1.73,-0.59,-1.1,0.69,0.22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018708803,0.000119261,CNV_LOW,0.046244824,0,0,0,0.000899662,0.002372009,0.001905127,0.12367529,0.595569786,0.280754924,MSS,No,WT,WT,0.801600757,0.610053482,0.282995148,4.64385619,4.169925001,3,CNV_low,29.41176471,5.882352941,47.05882353,5.882352941,5.882352941,5.882352941,Blood_normal,fe5e1dd1-ea01-4ff5-8019-c8d372a33339_gdc_realn.bam,4971ed33-2da8-4d91-97c1-31564e3e1796,Tumor,3fa66b09-0381-4234-9176-e70c8b4f7338_gdc_realn.bam,dad5a4fb-980f-4fac-a55f-19f43b41478b,Blood_normal,1d0fce2e-e790-4ff1-880b-67ecb7234d4b,Tumor,0dc37b55-fee6-4705-beab-1d4dfa5b3fd4,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GATCAG_S8_L008_R1_001.fastq.gz,fd7cf324-b89a-4558-a03f-d64ef2be88ff,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GATCAG_S8_L008_R2_001.fastq.gz,90917b44-0072-4cec-bd84-9dd8885045f0,Tumor,8fb25398-22bd-440b-9b8f-04559cd909bf,YES,PASS
+S066,C3N-00328,No,3,12,130C,C3N-00328-05,CPT0019840003,Tumor,Yes,Ukraine,FIGO grade 3,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,cM0,Staging Incomplete,Stage I,IB,0,31.22,62,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,3,Positive,90,Positive,70,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – positive;,Cannot be determined,Unknown,0.014894189,0.031289689,0.135733147,0.043499463,0.041931867,0.012644167,6.15E-10,0.720007478,0.003230715,0,0.091752351,0.16848275,0,0.103577841,0.263662345,0.083597209,0,0.004897877,0.080428803,0,0,0.063637704,0.194001873,0.285098088,0.004733789,0,0.002898656,0,0,0,1.35,4369.682108,9711.938731,14081.62084,0.861033458,-0.188,-0.643,0.06,-0.49,2.4,0.34,1.04,1.36,-1.47,-0.02,2.12,-0.82,-1.03,1,0,0,1,0,1,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,0,0,0,1,1,1,1,0.030018893,0.051935749,CNV_LOW,0.759519858,-0.004193273,-0.015025716,-0.02175412,0.166020994,0,0,0.490438133,0.36724791,0.142313957,MSS,Yes,WT,Missense_Mutation,7.571805466,7.567338041,0.667424661,12.57790084,12.57341051,4.321928095,POLE,45.14276337,0.393829997,35.28060387,5.562848704,0.918936659,12.70101739,Blood_normal,290e3a66-502c-40c7-b6cc-e4783b31bf23_gdc_realn.bam,92a6ecef-93d2-4e81-ba10-8c6fa21e412e,Tumor,4aec9b8e-d049-435a-8d3f-e17ee190dd8a_gdc_realn.bam,c1d2d417-5a50-4534-8ae1-fc69aca45772,Blood_normal,bdfcac24-2bb8-471f-ab8b-6fe9f1f10c3b,Tumor,6ba9f990-4d24-43fb-bf5b-1858a0146e7e,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_GCCAAT_S16_L004_R1_001.fastq.gz,1409ef12-3919-4b88-b5b8-4a85576fadc7,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_GCCAAT_S16_L004_R2_001.fastq.gz,fba3daa2-3a66-4af6-aa02-ff5cb8d534eb,Tumor,f18f7aa8-6d8a-4f1b-bf15-809bd5bfb5e5,YES,PASS
+S067,C3N-00333,No,2,5,127N,C3N-00333-03,CPT0011000003,Tumor,Yes,Ukraine,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,31,65,No,NA,NA,Female,"Other, specify",Entire Uterine Cavity,Multifocal,1,Positive,70,Positive,50,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – positive,Cannot be determined,Unknown,0.002296699,0.118953119,0.098801088,0.03362146,0.111089441,0.00527552,1.15E-09,0.629962672,0.016298314,0,0.00704365,0.01497689,0,0.069356038,0,0.009639726,0,0.047553185,0,0.020033554,0.051333479,0.093244702,0.008736583,0.090772415,0.01672796,0.022602173,0.050744634,0,0,9.00E-04,0.52,5520.164395,6014.318353,11534.48275,0.240174621,-0.126,-0.129,0.7,-0.43,-0.32,0.63,0.19,-1.31,1.61,0.66,0.06,-0.73,2.01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0.026753086,0.15183888,CNV_LOW,1.51025813,-0.012304598,-0.033351905,-0.026722719,0.253917096,0.000231892,0.00016282,0.136843014,0.370659592,0.492497394,MSS,No,WT,WT,1.349358266,1.313865823,0.086674411,5.672425342,5.614709844,1.584962501,CNV_low,14.58333333,6.25,60.41666667,6.25,8.333333333,4.166666667,Blood_normal,d6e45feb-9d72-474c-9d5b-a0b8d6c4dc8c_gdc_realn.bam,cf748e44-e144-4034-ad2e-99d431d3be22,Tumor,deec3429-33f4-4946-a550-66acc74ee372_gdc_realn.bam,a8e8fff3-176e-4b6d-a138-ce8079f4951c,Blood_normal,1fdfd239-41aa-4c8d-8b8b-c84d4b525d67,Tumor,3c2dee34-a0e2-4ee0-9ed1-3bb9405c0070,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_TGACCA_S22_L008_R1_001.fastq.gz,46b971bf-81ed-4c18-9272-3e65823123e2,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_TGACCA_S22_L008_R2_001.fastq.gz,ae764c5a-f0ca-4a2e-b08e-ef1a65ddfe20,Tumor,36d5d1e1-8afc-4058-82ad-4893869f23a2,YES,PASS
+S068,C3N-00334,No,4,14,129C,C3N-00334-02,CPT0018570003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,32.83,68,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,1.4,Positive,80,Positive,80,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – positive;,Cannot be determined,1,0.005235246,0.058682364,0.120915156,0.034217712,0.062289497,0.003675535,6.69E-07,0.714983821,0.045750101,0,0.040284527,0.01546921,0,0.207054009,0,0.02143719,0,0,0.019796059,0.009842251,0.009016892,0.036097355,0.013020446,0.127467118,0.049886437,0.009914774,0,0.013023059,0,0.001940573,0.62,3171.067885,5108.003671,8279.071556,0.544691598,0.459,0.454,-0.87,0.36,-0.77,-0.18,-0.14,0.54,-0.71,-0.01,-0.02,0.36,-0.39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,7.14E-05,0.000740447,CNV_LOW,0.623689582,-0.006951058,-0.012053628,-0.023956429,0.016728619,0.017922255,0.010002796,0.150426081,0.534767631,0.314806288,MSI-H,No,MS_indel,Frame_Shift_Del_Frame_Shift_Ins,3.691478851,3.211986209,2.218238939,8.592457037,8.06608919,6.894817763,MSI-H,16.10486891,0.74906367,62.54681648,14.98127341,3.745318352,1.872659176,Blood_normal,20e9a00b-8692-41fd-b34b-3d1028376f13_gdc_realn.bam,5c7c8278-74e6-4c50-9f43-e1977bc3bc4a,Tumor,0080c2ce-959e-42cc-8cbb-9ba2a91410e3_gdc_realn.bam,a6837fa3-b3d6-491e-91eb-e0b72fcbc5d8,Blood_normal,c11b3b83-b681-4e44-a9ac-7dd3a760752c,Tumor,6cc33674-26cb-4b7d-abc1-76081bda58ba,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GGCTAC_S14_L006_R1_001.fastq.gz,d92fcd95-f416-4785-aca7-f8c53a3099b1,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_GGCTAC_S14_L006_R2_001.fastq.gz,18275a18-20bd-45fc-af1e-e340d86a97c6,Tumor,47b66984-bad6-411b-ace2-2bd36575d119,YES,PASS
+S069,C3N-00335,No,2,8,130N,C3N-00335-05,CPT0018790003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,29.52,57,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,2.5,Positive,35,Positive,7,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – negative;,Cannot be determined,None,0.009399873,0.073173287,0.133402048,0.01668522,0.060392349,0.008031966,1.04E-10,0.698915258,0.037397016,0,0.016467675,0.129747623,0,0.094326881,0.097293058,0.024431161,0,0,0.01531505,0.035347052,4.00E-04,0.042293294,0.077466078,0.174081384,0,0,0,0.036358248,0,0.009113625,0.79,5147.204936,8381.068241,13528.27318,0.70875506,-1.76,-0.609,0.99,0.99,0.15,0.24,2.15,2.39,0.33,1.4,1.96,1.42,-0.25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.083306514,0.045518077,CNV_LOW,0.635283831,-0.003293513,-0.004209591,-0.015606792,0.002870901,0,1.68E-05,0.393045827,0.330014565,0.276939607,MSI-H,No,WT,WT,5.271130528,4.843430085,3.447198405,10.24792751,9.807354922,8.326429487,MSI-H,18.32402235,2.458100559,46.70391061,22.79329609,3.463687151,6.25698324,Blood_normal,360ce993-b06e-488c-8d6b-645b93688df0_gdc_realn.bam,30e9f0dd-467c-4332-8d66-bff36f698928,Tumor,27358361-30f1-497e-aec1-95d130c16820_gdc_realn.bam,d6c873cf-c90d-4c3b-8b69-2c51dcf49fa4,Blood_normal,90e40803-c8e8-42d5-892c-31d26341bab7,Tumor,bd8cc186-dc1e-407f-9250-4615d7e84c57,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_CTTGTA_S13_L006_R1_001.fastq.gz,ed41b710-e03a-402f-a5f5-7edd980641ae,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_CTTGTA_S13_L006_R2_001.fastq.gz,71b23a8a-c3a4-4c35-bb58-f03e9e6d8da0,Tumor,90881763-2f8c-48aa-adab-0b7534022949,YES,PASS
+S070,C3N-00337,No,1,4,131,C3N-00337-04,CPT0018960003,Tumor,Yes,Ukraine,FIGO grade 3,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pNX,cM0,Staging Incomplete,Stage I,IB,0,29.07,67,No,NA,NA,Female,"Other, specify",Entire Uterine Cavity,Multifocal,1.3,Positive,80,Positive,40,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – positive; Vimentin (Vim3B4) – positive;,Cannot be determined,1,0.005483621,0.054561115,0.117235002,0.03552117,0.105357622,0.001150593,5.75E-10,0.680690877,0,0,0.043745649,0.016943114,0,0.111390552,0,0.007989276,0,0,0.02417088,0,0.011103314,0.016427891,0.004766039,0.092418353,0,0.081958621,0.003860163,0,0,0.015226148,0.43,2317.879694,3551.918217,5869.797912,0.547735947,-0.282,-1.09,0.59,0.44,0.15,0.91,-0.71,-0.47,-0.18,-0.33,-1.81,0.51,-0.18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0.516647281,0.116898431,CNV_HIGH,4.872899162,-0.000660581,0,-0.234033494,0.458642624,0.17615102,0,0.090842904,0.682222746,0.22693435,MSS,No,WT,WT,1.483191823,1.313865823,0.389123498,5.882643049,5.614709844,3.459431619,CNV_high,20.83333333,8.333333333,52.08333333,6.25,4.166666667,8.333333333,Blood_normal,cec2f45d-c155-483f-97df-221ca574f6b9_gdc_realn.bam,7183ee6a-cb23-44bc-becd-c98704733392,Tumor,9a2dfd9e-6dd6-47a2-8e80-d48625d4e780_gdc_realn.bam,a8043bba-addf-4b7b-9349-ad547fc81f15,Blood_normal,0686705a-97fa-4c41-b9fa-15664a310849,Tumor,a09c225a-3e70-41ff-9a0e-0900c7be4f69,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_CCGTCC_S7_L005_R1_001.fastq.gz,2ed2fd9d-d69e-4924-bcac-555bfa24030e,Tumor,170818_UNC32-K00270_0050_AHL2FHBBXX_CCGTCC_S7_L005_R2_001.fastq.gz,6d9108cf-e7d8-4040-8e1d-3f707ee381c8,Tumor,e3285ecb-ca76-44c8-b642-0a9f2bdda849,YES,PASS
+S071,C3N-00339,No,2,6,131,C3N-00339-02,CPT0020530003,Tumor,Yes,Ukraine,NA,under 50 %,Serous,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,21.83,45,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,1.3,Positive,40,Positive,60,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – focally positive; Vimentin (Vim3B4) – focally positive;,Cannot be determined,Unknown,0.008953555,0.084386617,0.20409367,0.039508302,0.065979471,0.003261892,5.75E-08,0.593816436,0,0.003299952,0.019515459,0.008406152,0,0.151782648,0,0.018158207,0,0,0,0.026200573,0,0.052651407,0.023516134,0.088148927,0.001005253,0.032505502,0,0.095529589,8.00E-04,0.0284594,0.55,3770.318861,6024.521674,9794.840535,0.851157473,-0.81,0.538,-0.43,-0.36,0.54,-0.11,0.62,2.01,-1.35,0.45,-0.51,1.02,-1.33,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.293548454,0.045922162,CNV_HIGH,3.855353468,0,-0.000582126,-0.164517995,0.098145517,0.090120448,0.044860534,0.297638094,0.472159908,0.230201998,MSS,No,WT,WT,1.121990524,1.015548827,0.207641466,5.285402219,5.087462841,2.584962501,CNV_high,12.12121212,12.12121212,48.48484848,15.15151515,6.060606061,6.060606061,Blood_normal,f9f0e294-4ac5-4fca-b75c-bdaa29f51037_gdc_realn.bam,3f967a5c-a804-4979-9bff-ccf26929a35c,Tumor,8f03933b-8c46-46b6-9b33-ca5d62d9f61d_gdc_realn.bam,0c0a86f8-c867-41de-a46c-14d6df8e69bf,Blood_normal,508fe33e-3cf2-4659-be9f-669d5d54e09d,Tumor,163c572a-c9a2-4fd6-a4e4-2471c34b02d1,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_CAGATC_S9_L003_R1_001.fastq.gz,b583347d-5091-47aa-88f4-f13b6ccdbc9b,Tumor,170818_UNC32-K00270_0051_BHLCCHBBXX_CAGATC_S9_L003_R2_001.fastq.gz,fb5ba732-fdb9-4fb1-a189-ca27dc978cfd,Tumor,cc594bce-b895-47d0-914c-a3b1980b41a6,YES,PASS
+S072,C3N-00340,No,3,10,128C,C3N-00340-03,CPT0022620003,Tumor,Yes,Ukraine,NA,under 50 %,Serous,YES,Normal,pT3a (FIGO IIIA),pN1 (FIGO IIIC1),cM0,Staging Incomplete,Stage III,IIIC1,1,27,60,No,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,3.5,Negative,NA,Negative,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) –Positive; Vimentin (Vim3B4) – Negative,Cannot be determined,2,0.0067689,0.068433718,0.130744929,0.010548701,0.078487444,0.006156781,2.45E-09,0.698859525,0.010986849,0,0.010129747,0.119573197,0,0.135062253,0,0.052257959,0,0,0,0.095799524,0.006634182,0.083981574,0.065229078,0.180950468,0.017311522,0,0,0.073013728,4.00E-04,0.008671362,0.86,4660.078921,7879.766432,12539.84535,0.544317848,-1.62,-2.11,-0.18,-0.26,0.89,0.7,1.83,-0.16,0.6,1.69,1.53,-1.66,-0.91,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.409118908,0.259260859,CNV_HIGH,4.403322524,-0.047929426,-0.000856933,-0.060733953,0.045136978,0.376787468,0.000282338,0.281868662,0.403135805,0.314995532,MSS,No,WT,WT,1.277478143,1.240148892,0.086674411,5.554588852,5.491853096,1.584962501,CNV_high,22.72727273,27.27272727,31.81818182,4.545454545,9.090909091,4.545454545,Blood_normal,473ea35b-e97c-4d26-9321-84efd530f725_gdc_realn.bam,4f7981ac-194f-4927-ae66-97a7476372de,Tumor,bdc0043c-0eb0-4495-9620-7ede4bac168c_gdc_realn.bam,8c0129ea-4649-403e-9a53-2853c81782f0,Blood_normal,32560c79-2b82-4441-b1f2-fbd2838043ae,Tumor,bdd16566-8a24-4e7b-9c62-d5d9b84196eb,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GTCCGC_S23_L006_R1_001.fastq.gz,9c6560e4-984b-4de4-b784-06e2d358ee2d,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GTCCGC_S23_L006_R2_001.fastq.gz,b3de2a7e-ac96-42dd-bcf2-61f14df637c6,Tumor,61ad8a8e-4de3-438f-ae2f-c036d7b579a9,YES,PASS
+S073,C3N-00377,No,4,14,130N,C3N-00377-02,CPT0021650003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,34,64,Yes,NA,NA,Female,"Other, specify",Uterine cavity,Multifocal,1,Positive,90,Positive,92,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – Focal positivity; Vimentin (Vim3B4) – Positive,Cannot be determined,Unknown,0.004000451,0.018259117,0.068455696,0.026465241,0.050668927,0.001646215,4.65E-10,0.830504353,0,0,0.007937663,0,0,0.09073604,0,0.019611349,0,0,0.001813999,0.008723851,0,0.087639399,0.004394117,0.028277497,0.00618781,0.023606374,0,0.109393005,0.023100596,0.1185783,0.53,1368.770354,4260.000421,5628.770775,0.828513827,-0.108,-0.888,0.03,2.52,-1.47,1.96,1.04,-0.48,-1.79,1.71,-2.22,2.28,-1.34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.041772051,CNV_LOW,0.688823904,-0.000224265,-0.014711711,-0.01966535,0.430309377,0,0,0.168580861,0.763208662,0.068210478,MSI-H,No,WT,WT,4.103661326,3.667073084,2.456586466,9.033423002,8.566054038,7.189824559,MSI-H,13.22751323,3.968253968,65.07936508,12.43386243,3.439153439,1.851851852,Blood_normal,e41f9914-e7d0-41c2-bf1c-7cc9ef6c2a8c_gdc_realn.bam,1cecc8fa-07b8-4945-8bb9-2cd88ef2561f,Tumor,8886551e-e603-46cd-8f0e-f787821cbb00_gdc_realn.bam,3de00368-7da2-4f00-8396-3eb51f1b4b97,Blood_normal,d41328fd-e13e-41ad-affa-a32acce44ecc,Tumor,33b3fd40-ac00-4ae7-9643-e32d8fe38859,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_GTGGCC_S8_L003_R1_001.fastq.gz,927fd63f-62e5-491b-8c64-bdb58cb0881c,Tumor,171011_UNC31-K00269_0086_AHLJLCBBXX_GTGGCC_S8_L003_R2_001.fastq.gz,bc45d610-dace-4837-a41d-4a4ea3cb8b5c,Tumor,a9a39770-ecca-4f2a-b968-f23da3be8ce0,YES,PASS
+S074,C3N-00379,No,3,9,129N,C3N-00379-01,CPT0015520003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,36.81,41,No,NA,NA,Female,"Other, specify",Entire uterine cavity.,Multifocal,2.5,Positive,60,Positive,80,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - positive;,Cannot be determined,1,0.003503154,0.245873229,0.08983858,0.031302276,0.140659271,0.003490464,1.45E-08,0.485333012,9.00E-04,0,0.004338932,0.071564429,0,0.218420192,0,0.00995094,0.017933064,0,0.026913005,0.02064572,0.026199623,0.01902837,0.023567338,0.151485066,0.086513328,0.0015387,0.013040807,0,0.007938323,0,0.7,4548.865546,5698.605048,10247.47059,0.3289826,-0.055,0.863,0.2,-0.99,-0.54,-0.18,-0.21,0.51,0.62,-0.06,-0.01,-1.06,0.69,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.044645537,CNV_LOW,0.668276245,-0.013677414,-0.013714462,-0.010118216,0,0,0,0.090616832,0.305268446,0.604114722,MSS,No,WT,WT,1.037474705,0.993284573,0.086674411,5.129283017,5.044394119,1.584962501,CNV_low,28.125,25,43.75,0,0,3.125,Blood_normal,0041944a-53b2-47b2-8a4e-d50b00467f10_gdc_realn.bam,7828f203-3720-43d3-be43-c834edb7b683,Tumor,fcb00b68-150a-4231-9f7b-32cb967f8c23_gdc_realn.bam,4b47bcc5-e26c-4f9b-944e-d0e1f06ed67c,Blood_normal,546f1c4f-8040-4463-8bdf-e6b7db95345b,Tumor,3d01fbb1-f7ce-45be-960f-d4d4fcd68d73,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_ATTCCT_S1_L001_R1_001.fastq.gz,8b3b442d-0e88-4e6c-84b0-b4eaae2be844,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_ATTCCT_S1_L001_R2_001.fastq.gz,9c1db68b-16f9-42cd-b818-cd95a5236e11,Tumor,330c746b-4d8c-4a8b-a33f-a53d519e6091,YES,PASS
+S075,C3N-00383,No,1,2,129N,C3N-00383-04,CPT0022830003,Tumor,Yes,Ukraine,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,46,61,No,NA,NA,Female,"Other, specify",Uterine cavity,Unifocal,4,Positive,70,Positive,50,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) – Positive; Vimentin (Vim3B4) – Focal positivity,Cannot be determined,1,0.005384921,0.094470153,0.097992123,0.055027196,0.132889331,0.001412001,7.50E-09,0.612824268,0,0.008617884,0.015359593,0.039137254,0,0.096250531,0,0.0026438,0,0,0,0.011322581,0.014863803,0,0,0.083600371,0.005855964,0.010619227,0.026044313,0,0.025684679,0,0.34,3237.700172,3325.590092,6563.290264,0.514479137,0.515,0.054,0.58,-0.62,-1.16,0.92,-1.74,0.66,-0.11,-1.65,0.3,-0.4,1.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,2.31E-06,0.018125898,CNV_LOW,0.095221818,-0.001757162,0,0,0,0.004470598,0.005030412,0.054325363,0.360835808,0.584838829,MSS,No,WT,WT,1.277478143,1.258934249,0.043988012,5.554588852,5.523561956,1,CNV_low,22.22222222,4.444444444,55.55555556,6.666666667,2.222222222,8.888888889,Blood_normal,37e6dbaf-8495-467a-97d2-931a4061ec47_gdc_realn.bam,c56e7f63-aeaf-4c1e-8f3c-f17d6308709f,Tumor,6e9f2a37-b53b-4017-ae76-ac141191ea9c_gdc_realn.bam,f47e869d-e5af-4f78-a38e-af3bec29cabd,Blood_normal,ce679c8e-fc60-4adb-87c1-f1d569949992,Tumor,b48d0c57-1858-4982-9a19-2cd0475d7c36,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GATCAG_S8_L002_R1_001.fastq.gz,d8669320-5d71-4398-b65c-26c820c08fbf,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_GATCAG_S8_L002_R2_001.fastq.gz,d98e6375-44c4-45d8-a771-293f086bc52b,Tumor,0cad6daf-f36f-43ab-90f3-f5a083fec4e9,YES,PASS
+S076,C3N-00386,No,3,9,130C,C3N-00386-02,CPT0016220003,Tumor,Yes,Ukraine,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,27.31,44,No,NA,NA,Female,"Other, specify",Entire uterine cavity.,Multifocal,2.3,Positive,80,Positive,70,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - focal positivity; Vimentin - focal positivity;,Cannot be determined,2,0.003464145,0.124031449,0.106148283,0.053395146,0.075680425,0.002052767,4.21E-09,0.635227781,0.011563794,0,0.002019326,0.077080519,0,0.21779436,0,0.075026311,0,0,0,0.017937373,0,0.017185208,0.019426868,0.129435366,0.025064852,0.016131815,0.041334208,0,0,0,0.65,3739.873076,5415.732444,9155.605519,0.340749746,0.184,0.288,-0.37,-1.43,0.68,-0.58,-0.63,-0.24,0.74,-0.87,-0.11,-0.48,1.01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0.089007713,CNV_LOW,1.21580133,-0.031026014,-0.03162148,-0.030961733,0,0,0,0.118997554,0.433965799,0.447036647,MSS,No,WT,WT,0.775745316,0.722601368,0.086674411,4.584962501,4.459431619,1.584962501,CNV_low,23.80952381,19.04761905,57.14285714,0,0,0,Blood_normal,d2d78e06-7a68-485f-a67e-256986fb3006_gdc_realn.bam,defc4d49-3f61-4186-8bba-95942de36e01,Tumor,86ea43f0-a9c9-47de-959c-d96c72272713_gdc_realn.bam,d7265bb5-d80b-4b5b-a2b6-2faef6b63167,Blood_normal,061f7b00-6f79-4554-b725-5778501dbccb,Tumor,160b8957-db23-4cd1-8528-c465bb3739af,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_ATCACG_S4_L001_R1_001.fastq.gz,68aa44a5-d2a6-4ba3-ba6c-0650d6f945f4,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_ATCACG_S4_L001_R2_001.fastq.gz,c5af6b28-5fc3-4a17-bb93-bceb731d1f80,Tumor,4a81704b-9e9a-495b-bfe5-4f6ea24ec877,YES,PASS
+S077,C3N-00388,No,4,13,129N,C3N-00388-01,CPT0016940003,Tumor,Yes,Ukraine,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,20.55,59,No,NA,NA,Female,"Other, specify",Entire uterine cavity.,Multifocal,4,Positive,85,Positive,60,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 (OV-TL 12/30) - Positive; Vimentin (Vim3B4) - Negative;,Cannot be determined,1,0.004485592,0.052902816,0.105776981,0.032607874,0.061969134,0.002609858,3.06E-09,0.739647742,0,0,0.030300705,0.021893216,0,0.164865609,0,0.027823669,0.0094154,0,0.030943857,0.002314538,0.011249512,0.058283669,0.024603135,0.112311231,0,0.004934959,0,0.039742974,0,0.011317526,0.55,2390.647801,5071.823771,7462.471572,0.855693124,-0.167,-1.76,-0.08,1.32,-0.89,0.06,0.46,0.72,-0.61,1.09,-0.35,1.01,-0.62,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,9.62E-05,0.000609382,CNV_LOW,0.099159025,0,0,-0.001173457,0.004475428,0.006028132,0,NA,NA,NA,MSI-H,No,WT,WT,3.839223293,3.426576341,2.18915136,8.751544059,8.303780748,6.857980995,MSI-H,15.87301587,1.904761905,55.55555556,19.36507937,2.222222222,5.079365079,Blood_normal,24dbe8dc-e176-464f-94a0-006aa43bee59_gdc_realn.bam,3904884a-aa68-428b-8ed5-76e52c511f38,Tumor,2949e6ba-cb37-40b5-a7cf-09978da51025_gdc_realn.bam,168de14c-fd98-4baa-a673-d5ab0db9ad03,Blood_normal,ab9ad1e1-c9a1-44a9-b7af-d741d562a6c5,Tumor,9ef2c309-7943-42d6-9e53-6e0e7186e05a,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_AGTTCC_S19_L005_R1_001.fastq.gz,c6980525-d030-4169-9393-0f1f73fe04a8,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_AGTTCC_S19_L005_R2_001.fastq.gz,a989c255-f970-475b-bd15-7b7103241d2a,Tumor,22b0937f-161f-4c33-85d9-00c9c74241e3,NA,NA
+S078,C3N-00389,No,1,3,131,C3N-00389-04,CPT0026270003,Tumor,Yes,Ukraine,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,cM0,Staging Incomplete,Stage I,IB,0,17.85,62,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Multifocal,1.5,Positive,75,Positive,70,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - focal positivity;,Cannot be determined,1,0.012965305,0.07740866,0.127668486,0.038402581,0.065170953,0.005576804,1.44E-09,0.67280721,0,0.014679488,0.070557933,0.149854695,0,0.124816275,0.053681598,0.108819909,0.01092673,0,0,0.014971624,0,0.157369678,0.0433648,0.219421538,0.011178091,0,0.010357642,0,0,0,0.99,3323.981653,6780.561618,10104.54327,0.570927382,-0.0707,-0.604,-0.34,-1.4,-0.81,0.33,0.12,-0.6,-0.17,-0.66,0.99,0.57,-0.89,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0.018033776,0.000785674,CNV_LOW,0.034407469,-0.001862026,0,-0.000253125,0,0.002272731,0.006610292,0.233208597,0.526925007,0.239866397,MSI-H,No,MS_indel,Frame_Shift_Del,4.44863097,3.932859217,2.919047794,9.396604781,8.851749041,7.73470962,MSI-H,13.23210412,3.036876356,59.86984816,20.60737527,1.301518438,1.952277657,Blood_normal,7306b3df-21f1-45bd-8905-0d6be9035f4d_gdc_realn.bam,7f94f3b7-bae8-4766-89d3-eb0361fd142c,Tumor,5dff9447-7da8-4573-bb38-003f96c1efa7_gdc_realn.bam,a84d1048-c1c0-418a-ac9b-efe0b9dcafbe,Blood_normal,ac7408b2-96a1-4736-8e97-f5ae263c3b21,Tumor,be158832-b452-457b-8964-8ed06a55400f,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_ATCACG_S15_L007_R1_001.fastq.gz,dd9cffde-1af6-49c4-b230-6b68529c0cab,Tumor,170828_UNC31-K00269_0077_AHLF33BBXX_ATCACG_S15_L007_R2_001.fastq.gz,0a029fc1-cddc-482f-8d17-f1aee87e877f,Tumor,0d9213fe-df70-4f56-8ceb-fef30e5f5fa4,YES,PASS
+S079,C3N-00729,No,2,6,128C,"C3N-00729-02,C3N-00729-03",CPT0064930004,Tumor,No,United States,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN0,Staging Incomplete,Staging Incomplete,Stage II,IB,0,29.62,86,No,White,Not-Hispanic or Latino,Female,"Other, specify",both anterior and posterior,Multifocal,4,Positive,-1,Positive,-1,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,see comment below,Cannot be determined,4 or more,0.006626794,0.072052749,0.099153436,0.013685766,0.108277744,0.008524373,3.78E-09,0.691679134,0.00168362,0.019373833,0.033097091,0.020138707,0,0.081380422,0,0.020752246,0.006463702,0,0,0.019284594,0,0.304649896,0.007387838,0.124292888,0,0,0,0.061907521,0,0.059587641,0.76,4279.438113,6798.100266,11077.53838,0.691178947,-0.329,-1.1,0.85,0.79,-0.62,0.57,1.54,0.31,-0.53,1.96,0.73,-0.01,0.09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.024296112,0.10927099,CNV_LOW,1.268069804,-0.000303572,-0.013557086,-0.034077111,0.266096258,0.136186586,0.005374178,0.253958448,0.556850798,0.189190754,MSS,No,WT,WT,1.732044006,1.59090564,0.389123498,6.247927513,6.044394119,3.459431619,CNV_low,13.84615385,7.692307692,56.92307692,15.38461538,3.076923077,3.076923077,Blood_normal,4c0795d8-5700-45d5-ac34-de5f2660a99d_gdc_realn.bam,2706ff8e-1101-4969-a2e3-d2bdfe201e66,Tumor,2fbd49f6-4c1b-4bb8-a92c-27ac702d593f_gdc_realn.bam,e35e206a-6a63-428d-9ee1-634c076e34f4,Blood_normal,bc11cdc1-7b13-4d28-b9c4-6ad617ca75f7,Tumor,e3322e24-bb2a-494d-b11f-619c992a284d,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CCGTCC_S20_L006_R1_001.fastq.gz,2c0420b6-8299-46ed-8971-deab6b23eabf,Tumor,170823_UNC31-K00269_0075_AHLCW5BBXX_CCGTCC_S20_L006_R2_001.fastq.gz,f783684d-96e1-46be-a19e-686d53adf650,Tumor,205dc43a-bbe9-496e-8d14-cc23041feaf7,YES,PASS
+S080,C3N-00734,No,1,1,130N,"C3N-00734-01,C3N-00734-03",CPT0026030004,Tumor,No,United States,FIGO grade 1,50 % or more,Endometrioid,YES,Normal,pT3a (FIGO IIIA),pN0,Staging Incomplete,Staging Incomplete,Stage III,IIIA,1,38.97,53,No,White,Not-Hispanic or Latino,Female,Anterior endometrium,NA,Unifocal,4,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,None,0.005026187,0.061861318,0.105775791,0.027766592,0.131855933,0.003653403,4.41E-09,0.66406077,0,0.006524058,0,0.016723849,0,0.136729657,0,0.014417241,0.001303502,0,0,0.069134349,0,0.011028416,0,0.097929766,0.009838939,0.015000539,0,0.128791658,0,0.072578025,0.58,2750.595162,5784.902177,8535.497339,0.566569828,-0.0713,-0.0363,-0.06,1.46,-0.68,-0.43,0.64,-0.1,-0.88,0.91,0.42,1.54,0.38,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,8.99E-06,0,CNV_LOW,0.033533736,0,0,-0.001532753,0.000595152,0.000266397,0,0.254109924,0.541519478,0.204370598,MSS,No,WT,WT,0.924352909,0.695276782,0.354607617,4.906890596,4.392317423,3.321928095,CNV_low,15,10,55,10,5,5,Blood_normal,d17afc43-9d34-4784-87aa-12c19595d871_gdc_realn.bam,c9c4d8b9-6c91-46e1-b6f8-d759c7f02165,Tumor,0750565e-93eb-43b6-87d3-8f84199fc1a0_gdc_realn.bam,b3fb76a3-c463-4fc1-b12c-88dbe5285071,Blood_normal,8a8cced8-56c3-41d5-aaab-2cb28481e9c1,Tumor,2a812ded-32cc-466b-9ede-be47f74903f7,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_ACAGTG_S7_L002_R1_001.fastq.gz,9faa7052-ed20-4706-97f2-c3f183a4a0bd,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_ACAGTG_S7_L002_R2_001.fastq.gz,4518b21b-b9a3-4261-b0e1-27b9f7d0e282,Tumor,0cbb0afc-eab3-4369-ba73-83ad9befb0a9,YES,PASS
+S081,C3N-00743,No,3,12,128N,C3N-00743-01,CPT0062940003,Tumor,No,United States,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,48.46,53,No,Black or African American,Not-Hispanic or Latino,Female,"Other, specify","Involves fundus, anterior and posterior walls",Multifocal,3.5,Unknown,NA,Unknown,NA,Unknown,Unknown,Unknown,Unknown,Unknown,NA,Cannot be determined,2,0.004039392,0.168628647,0.078980272,0.035878962,0.149448996,0.000433903,6.88E-09,0.56258982,0.010998418,0,0.038080912,0.046626728,0,0.160050801,0,0.024640361,0.013355372,0,0.033115855,0.009230714,0.020732476,0.013521662,0,0.03309009,0,0.015563506,0.020993106,0,0,0,0.44,2616.128221,2870.606208,5486.734429,0.564729059,-0.378,-0.105,0.3,-0.25,-1.97,0.14,-1.99,-0.31,0.54,-1.66,-1.7,0.01,-0.42,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.005207401,0.017915723,CNV_LOW,0.585354783,-0.002837097,-0.004178351,-0.007956516,0.010595091,0.011390353,0.011912057,0.0632497,0.578781362,0.357968938,MSI-H,No,WT,WT,3.409863787,2.954052221,1.955780345,8.285402219,7.77478706,6.554588852,MSI-H,16.05504587,2.293577982,55.50458716,19.26605505,2.752293578,4.128440367,Blood_normal,5b71b4cf-7f14-42ec-bbe6-58842e3efd3f_gdc_realn.bam,7b479f2e-cccc-475d-84c6-d54ad43a5966,Tumor,fb83df64-bb73-499c-a57a-9436e26aa093_gdc_realn.bam,614a8780-9f61-476c-8d81-dbd4f18c0220,Blood_normal,d062478a-4524-4ba8-b2d6-f50395fcc112,Tumor,7a302498-b5e1-479b-898f-fd7bd524db94,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_CTTGTA_S13_L004_R1_001.fastq.gz,a7609db0-2819-4dc0-9988-4a1323d15135,Tumor,170823_UNC31-K00269_0076_BHLCTFBBXX_CTTGTA_S13_L004_R2_001.fastq.gz,b10f3be4-53ec-4526-96d5-050e3a0eebe7,Tumor,f309b870-6a47-4af2-b619-6678eb2f07e0,YES,PASS
+S082,C3N-00836,No,2,6,130C,C3N-00836-02,CPT0023810003,Tumor,Yes,Ukraine,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM1,Staging Incomplete,Stage I,IA,0,30.47,75,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Multifocal,1,Positive,40,Negative,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - negative;,Cannot be determined,3,0.018025928,0.039861707,0.163024583,0.045724267,0.114536976,0.011478385,2.09E-09,0.607348151,0.04942716,0.016317124,0.031350984,0.63289952,0,0.003827364,0.503309554,0.159976185,0,0,0.056594181,0.054696075,0,0.175928867,0.306262844,0.443728427,0,0,0,0.034636055,0,0.031045659,2.5,5319.12579,9583.565561,14902.69135,0.680627371,-0.914,-1.34,0.37,-0.6,1.79,0.78,2.03,0.16,-0.28,1.14,1.57,-1.1,-1.42,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.30040734,0.174486818,CNV_HIGH,1.578135012,-0.003798404,-0.000574442,-0.079583915,0.043401406,0.100474431,0,0.413790787,0.24094175,0.345267463,MSS,No,WT,WT,1.450879273,1.383998466,0.168435383,5.832890014,5.727920455,2.321928095,CNV_high,21.15384615,23.07692308,34.61538462,1.923076923,9.615384615,9.615384615,Blood_normal,fa4f35a1-cb0a-4d92-8631-4f675161b355_gdc_realn.bam,fc4f4a44-123f-4d29-82e8-c155ed19910f,Tumor,08fed8ee-199e-4b11-bcee-2b3b2b320ae9_gdc_realn.bam,b3f4557b-7fb4-41e5-b5b9-44c725451295,Blood_normal,09220dab-d4b0-4d15-bb22-a222b5d0abc1,Tumor,3b118458-2f1c-42f4-9ae0-17485da8782a,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_TTAGGC_S6_L008_R1_001.fastq.gz,f19c6938-942c-47bd-b72d-79d62e8fbeb8,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_TTAGGC_S6_L008_R2_001.fastq.gz,5c51da7e-eead-4fd1-8343-c4d4ddc19458,Tumor,b2d2a0d0-15ce-4652-b46a-1ad770e5e8b2,YES,PASS
+S083,C3N-00847,No,4,16,130C,C3N-00847-01,CPT0027650003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT3a (FIGO IIIA),pN0,cM0,Staging Incomplete,Stage III,IIIA,0,34.53,65,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Multifocal,4.3,Positive,95,Positive,80,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - positive;,Cannot be determined,2,0.005454982,0.040036596,0.091207189,0.053361059,0.06545845,0.000828178,2.07E-09,0.743653544,0,0,0.010732175,0.053007557,0,0.081197741,0,0,0.056220642,0,0.008729727,0,0,0.170651046,0.007473355,0,0,0.007590874,0.032382459,0,0,0.002014424,0.43,1325.965485,2157.90896,3483.874446,0.628709017,0.624,-0.61,-0.7,0.78,-1.8,-1.02,-2.51,0.8,0.21,-2.21,-2.31,-0.3,-0.65,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018864037,0.000178777,CNV_LOW,0.372169582,-0.007844061,-0.008377274,-0.031655005,0,0,0,0.002630944,0.717091014,0.280278042,MSS,No,WT,WT,1.545723606,1.434447489,0.282995148,5.977279923,5.807354922,3,CNV_low,12.72727273,7.272727273,72.72727273,1.818181818,3.636363636,1.818181818,Blood_normal,9333c264-b25a-4ad9-8cbc-60d4d961a1e4_gdc_realn.bam,2c32b668-5200-49de-843a-bb14b3707a90,Tumor,10bc413b-8a36-4190-9e47-48750c262fa0_gdc_realn.bam,f418b6a5-909d-4f06-b3b2-da95f81bc611,Blood_normal,fbdad251-d8da-4651-9e23-ab9aa7ebdfca,Tumor,9d6fba33-07d9-4f69-80af-44adc48f100a,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_CGATGT_S6_L002_R1_001.fastq.gz,a3c73a24-ad46-4690-a782-0c9de7554f6b,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_CGATGT_S6_L002_R2_001.fastq.gz,33715f26-60b2-4c5d-994c-00faa767e9be,Tumor,6b8152cb-2fcc-49ac-8e30-562bbe846050,YES,PASS
+S084,C3N-00848,No,4,15,130C,C3N-00848-02,CPT0024130003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,33.65,66,Yes,NA,NA,Female,"Other, specify",Entire uterine cavity,Multifocal,3,Positive,40,Positive,70,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK - positive; Vimentin - positive;,Cannot be determined,None,0.006741019,0.046914004,0.132741322,0.04025961,0.059487435,0.00231511,1.58E-09,0.711541498,0,0.009631275,0.078532432,0.061654733,0,0.141768576,0,0.030498309,0.0262358,0,0,0.055392899,0.009388966,0.037858222,0.011451255,0.099516218,0,0.009602334,0,0,0,0.028468981,0.6,2236.194057,4772.594927,7008.788984,0.623396118,0.628,0.41,0.39,-0.03,0.08,0.53,-0.36,-0.61,-0.53,-0.28,-0.24,0.58,0.34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.009800931,0.195425536,CNV_LOW,2.887210768,-0.016228174,0,-0.116987117,0.215849759,0.030893769,0.000444979,0.160723734,0.594726783,0.244549483,MSS,No,WT,WT,1.691133088,1.605657252,0.245810227,6.189824559,6.06608919,2.807354922,CNV_low,16.66666667,7.575757576,60.60606061,7.575757576,1.515151515,6.060606061,Blood_normal,1d98bc15-ba29-4594-bc9d-504e70515a19_gdc_realn.bam,e562cec2-f26f-4706-9927-f13ea930d5fa,Tumor,75c83651-16d9-4099-ac1c-9ac164140aac_gdc_realn.bam,ba0c4492-c816-44ed-9ca0-6c610375514f,Blood_normal,fa7446b0-a818-4096-941d-746e73d363fa,Tumor,3a1b5ca0-555b-41e1-bd66-86c2796d69ca,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_GTGGCC_S3_L001_R1_001.fastq.gz,6e88806d-df9b-40b0-9ab9-d5f5265ef294,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_GTGGCC_S3_L001_R2_001.fastq.gz,b8f22b8f-204c-48b4-8147-5c9caf3c2f76,Tumor,087bea19-c79b-4b8a-a6fe-0f4b33a79257,YES,PASS
+S085,C3N-00850,No,1,1,131,C3N-00850-02,CPT0027810003,Tumor,Yes,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,28.84,65,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Multifocal,1,Positive,86,Positive,95,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - positive;,Cannot be determined,1,0.005502748,0.082280026,0.105293495,0.041651211,0.079296536,0.004117287,1.08E-09,0.681858696,0,0,0.055544593,0.191800595,0,0.098042625,0,0.05726203,0.028484946,0,0,0.048861303,0.002629565,0.095029079,0.066989651,0.174223788,2.00E-04,0,0.020947178,0,0,0,0.84,3142.491368,5804.812736,8947.304105,0.562402302,0.983,1.5,-0.33,0.29,0.07,-0.41,-0.48,1.21,0.03,-0.69,-0.23,-0.76,-0.35,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,3.67E-06,0.042450766,CNV_LOW,1.144517018,0,-0.031027797,-0.031470962,0.62144095,0,0,0.199052848,0.564237797,0.236709354,MSI-H,No,MS_indel,Frame_Shift_Del,3.981739572,3.572626906,2.293055185,8.903881846,8.463524373,6.988684687,MSI-H,19.31818182,1.136363636,48.01136364,18.75,7.386363636,5.397727273,Blood_normal,9872eab8-63d3-4882-877b-7b4de75bba37_gdc_realn.bam,513b27f2-8ec9-45d2-bcd4-f733bc617f3b,Tumor,850bd0cf-dd68-40b6-a6b6-83e42e76fd32_gdc_realn.bam,35036101-2e5d-4d47-bd7c-3eb638bcf6a2,Blood_normal,ac12d7d2-d6d3-454b-aa22-827c6b7a49a9,Tumor,76e8c02d-65d4-4ff0-bad5-5b0393d401f9,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_AGTTCC_S19_L005_R1_001.fastq.gz,232956d9-bea5-47c0-8fcb-bfbbb8bab7e5,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_AGTTCC_S19_L005_R2_001.fastq.gz,23941127-4227-4b4a-82ea-f76fe7062763,Tumor,aa58a384-12a8-4f6b-955e-64c110cd8a6e,YES,PASS
+S086,C3N-00858,No,1,1,127N,"C3N-00858-01,C3N-00858-04",CPT0078320004,Tumor,No,Poland,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT3b (FIGO IIIB),pN0,cM0,No pathologic evidence of distant metastasis,Stage III,IIIB,1,36,65,No,NA,NA,Female,Anterior endometrium,NA,Multifocal,11,Positive,50,Positive,50,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Normal,NA,Cannot be determined,2,0.008642722,0.014995028,0.127639879,0.040554232,0.07255053,0.002238438,1.53E-10,0.733379171,0,0.013644085,0.039926845,0.055865002,0,0.058394539,0,0.010287635,0.005921685,0.004284004,0,0,0,0.099516789,0.009653509,0.094545911,0,0,0.047959996,0,0,0,0.44,1989.365377,5177.586543,7166.95192,0.813240954,-0.00667,-0.179,-0.87,0.16,-0.56,-2.47,-0.97,1.47,-1.24,-0.75,0.67,0.16,-0.69,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,4.98E-05,0.194728454,CNV_LOW,3.281137801,-0.095116164,-0.117524875,-0.116374872,0.001100237,0.000255572,0.000672214,0.121613963,0.763673461,0.114712575,MSS,No,WT,WT,1.872844004,1.691133088,0.519478029,6.442943496,6.189824559,3.906890596,CNV_low,13.88888889,6.944444444,63.88888889,6.944444444,6.944444444,1.388888889,Blood_normal,bdc38930-0dd6-4f39-a41d-56574c4b93ea_gdc_realn.bam,310f8751-c639-4b2e-86a2-5599a7e37d9d,Tumor,b93f9712-3ac1-4bf5-b4c0-a3f584dc872a_gdc_realn.bam,abf58596-7554-4be7-bd99-2b0e02a40d41,Blood_normal,c00380d4-0e17-4a1f-afdf-f94329078eaf,Tumor,4d80e3cb-84e1-405a-a6e0-3e22eef87513,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_AGTCAA_S18_L006_R1_001.fastq.gz,1a17a35c-466d-4a23-aee2-04b09c784e2a,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_AGTCAA_S18_L006_R2_001.fastq.gz,1a561b74-0af0-4d52-9288-9fa08c5d2729,Tumor,5c87b71a-c700-4731-9440-2534231a2e41,YES,PASS
+S087,C3N-00866,No,2,8,127N,C3N-00866-01,CPT0063090003,Tumor,No,United States,FIGO grade 1,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,23.88,77,Yes,White,Not-Hispanic or Latino,Female,"Other, specify","Anterior, posterior and fundus",Multifocal,3,Unknown,NA,Unknown,NA,Unknown,Unknown,Unknown,Unknown,Unknown,NA,Cannot be determined,4 or more,0.006343461,0.020465846,0.105062707,0.043460088,0.074407637,0.003497833,4.41E-10,0.746762428,0,0.017101616,0,0.163010063,0,0.141971235,0,0.016325308,0.020834183,0,0.034585703,0.028120711,0.029366632,0,0,0.254847613,0.001366269,0.017862528,0.01460814,0,0,0,0.74,1380.859986,4333.723559,5714.583545,0.521483431,0.853,-0.197,-1.03,-1.09,-0.48,-1.63,-0.83,-0.16,-0.75,-0.57,0.08,1.68,1.3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018712496,0.048341817,CNV_LOW,0.556900161,-0.013058781,-0.012890698,-0.017694369,0,0,0,0.097312578,0.639515552,0.26317187,MSS,No,WT,WT,1.704899366,1.620259553,0.245810227,6.209453366,6.087462841,2.807354922,CNV_low,13.43283582,7.462686567,58.20895522,11.94029851,5.970149254,2.985074627,Blood_normal,5a955d91-4992-4986-a78b-a2f80f455ea1_gdc_realn.bam,ee7f6e8b-6389-4391-9e6a-5cdc220bc39b,Tumor,6c9f01df-2bc2-405e-b94c-a673cde4f948_gdc_realn.bam,ad5ee7c0-c8bd-4cab-b438-7af8fa0c9946,Blood_normal,3cc9b549-bbac-48cc-8a82-5486203bf83d,Tumor,c825581d-0f15-4602-9895-fc171e032585,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_TTAGGC_S8_L002_R1_001.fastq.gz,99455b1b-f110-46ef-a03e-be651c28a0bd,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_TTAGGC_S8_L002_R2_001.fastq.gz,8323759d-cdc4-481c-8518-f170b5a87082,Tumor,bc1bf3bb-05d7-4eb0-8af0-8da11436dc69,YES,PASS
+S088,C3N-00880,No,3,12,130N,"C3N-00880-01,C3N-00880-03,C3N-00880-04",CPT0078710004,Tumor,No,Poland,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pN0,cM0,No pathologic evidence of distant metastasis,Stage I,IA,0,27,61,No,NA,NA,Female,Anterior endometrium,NA,Unifocal,3.2,Positive,50,Positive,50,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Normal,NA,Cannot be determined,1,0.006020275,0.03431866,0.112556539,0.049057793,0.044698224,0.004422598,5.54E-09,0.748925904,0,0,0.012116224,0.03888996,0,0.080097796,0,0.011367262,0.019249438,0,0,0.034249071,0.002181305,0.046756746,0.039194685,0.184924122,0.017629382,0,0,0.006719297,6.00E-04,0.005992563,0.5,2258.605402,5297.723765,7556.329167,0.621475455,0.638,0.784,-0.46,1.38,0.82,-1.66,-0.34,-1.57,-1.28,-0.41,0.73,0.81,1.41,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018027094,0.048335778,CNV_LOW,0.923181545,-0.0229681,-0.02685752,-0.028388489,0,0,0,0.097162309,0.732064691,0.170773,MSI-H,No,WT,WT,4.648850071,4.15721395,3.043446359,9.605479518,9.09011242,7.876516947,MSI-H,17.83088235,3.125,59.00735294,15.99264706,1.654411765,2.389705882,Blood_normal,d564a1a0-e99a-4998-8fa1-df8128379f65_gdc_realn.bam,1a8134da-8b3b-4ef2-bef6-a09a7bba5847,Tumor,90388559-98b6-4014-b201-f64e4965649b_gdc_realn.bam,dbc9b510-b274-4590-b23a-606d5ae71d48,Blood_normal,a3cc4371-a3ba-4bf9-83a8-944c42d63a5d,Tumor,46d5c8ad-33ef-4601-afbd-9b24761939c4,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTCCGC_S5_L002_R1_001.fastq.gz,f0952653-8c23-4781-a477-c2ce76f9d1b6,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTCCGC_S5_L002_R2_001.fastq.gz,6a2bd4f3-68fc-48f2-a604-db2c5a7b665b,Tumor,a32f2c2b-53c9-4e3a-92c4-21255e14de9b,YES,PASS
+S089,C3N-01001,Yes,1,2,130N,C3N-01001-03,CPT0078880003,Tumor,No,Poland,FIGO grade 2,under 50 %,Endometrioid,NO,Normal,pT2 (FIGO II),pN0,cM0,No pathologic evidence of distant metastasis,Stage II,II,0,44,72,Yes,NA,NA,Female,Posterior endometrium,NA,Unifocal,9.5,Positive,50,Positive,50,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Normal,"p169 negative, CEA positive/negative, vimentin positive, C5/6 positive",Cannot be determined,2,0.004084696,0.061835184,0.092073711,0.031092541,0.059780844,0.002658682,3.50E-07,0.748473991,0.016230354,0,5.00E-04,0.022281101,0,0.08361554,0,0.048851971,0.003613455,0,0.025937953,5.00E-04,0.014752748,0.062713266,0.010785251,0.016360624,0.026805078,0,0,0.071420892,0.020967199,0.014642587,0.44,2629.193014,4787.338006,7416.53102,0.722053168,-0.241,-0.89,2.25,1.05,-1.75,2.19,0.72,-0.37,-0.79,1.06,-0.43,0.87,-0.15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,NA,NA,NA,NA,-0.019598685,-0.012099912,-0.010995352,0,0,0,0.114951044,0.591579306,0.29346965,MSI-H,No,MS_indel,Frame_Shift_Del,3.848555117,3.397201081,2.283912601,8.761551232,8.271463028,6.977279923,MSI-H,12.66233766,2.272727273,66.55844156,14.61038961,2.922077922,0.974025974,Blood_normal,d02e9e05-81d1-4fa8-a227-ca3cec105ba9_gdc_realn.bam,925e35ea-964c-467c-af2a-a7784b0678c5,Tumor,9b7b8a64-7bb9-4c81-ac55-726b9577429a_gdc_realn.bam,8ef06338-11c6-441b-8c8c-f3c546442f8a,Blood_normal,01c15e02-acbc-49f1-b489-5e171548b0e8,Tumor,7d46687a-97ce-4fe4-adb0-3da1045e32d4,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTTTCG_S7_L002_R1_001.fastq.gz,f8c5ac84-6343-4a3c-9dcb-a04b0e23bb62,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTTTCG_S7_L002_R2_001.fastq.gz,e612b542-1d0e-45ee-84b8-60d39b004a6c,Tumor,dac6ce09-1698-4b72-af83-7a89bc209f04,YES,PASS
+S090,C3N-01003,No,3,9,130N,C3N-01003-01,CPT0078610003,Tumor,No,Poland,FIGO grade 1,50 % or more,Endometrioid,YES,Normal,pT3b (FIGO IIIB),pN0,cM1,pM1,Stage IV,IVB,0,31,73,Yes,NA,NA,Female,Anterior endometrium,NA,Multifocal,3,Positive,50,Positive,50,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,NA,Cannot be determined,4 or more,0.003476539,0.388840022,0.065617035,0.038687944,0.171853539,0.003535598,1.10E-07,0.327989213,0.021980571,0,0.029239852,0.039094717,0,0.125676207,0,0.025906813,0,0.004320911,0,0.038248608,0.012636839,0,0.003210075,0.157008808,0.002885581,0.01803284,0.071758177,0,0,0,0.55,4781.4706,4986.748413,9768.219013,0.259391825,1.39,0.837,-0.84,-1.01,-0.21,-0.37,-1.28,-0.73,0.84,-1.31,-0.35,-1.29,1.22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.200347839,0.805695672,CNV_HIGH,0.104003783,-0.001486725,0,-0.01415543,0,0.001173413,0,0.030460134,0.314292946,0.65524692,MSS,No,WT,WT,1.383998466,1.162461482,0.455772534,5.727920455,5.357552005,3.700439718,CNV_high,10,7.5,62.5,12.5,2.5,5,Blood_normal,51eb5a05-ad42-43dc-90a4-3ff7bb659a08_gdc_realn.bam,a434ccce-59e9-494a-9621-9cc732068f76,Tumor,b3f94939-b025-462f-8fff-ecbcd2a7d970_gdc_realn.bam,6e7973fa-f4a2-4c62-993e-444a64f54479,Blood_normal,f7ee25ab-e8b9-411d-8a85-30a3e162ce4b,Tumor,c3c5f8e3-f323-4657-baab-5a0af7cbc471,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_ATGTCA_S14_L006_R1_001.fastq.gz,4313d974-189f-4e5e-93c5-87adb840c560,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_ATGTCA_S14_L006_R2_001.fastq.gz,a613938d-aa43-4c67-ba0c-78c1c6c69a7b,Tumor,be14af37-382a-4a7a-a4c5-b26eabe64353,YES,PASS
+S091,C3N-01211,No,3,10,127N,C3N-01211-01,CPT0075900003,Tumor,No,Ukraine,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT2 (FIGO II),pN0,cM0,Staging Incomplete,Stage II,II,1,39.2,59,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Multifocal,1.4,Positive,95,Positive,90,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - patchy positivity; Vimentin - positive;,Cannot be determined,1,0.002510216,0.094950229,0.072331793,0.025750294,0.085395952,0.002150148,6.72E-10,0.716911368,0.00875642,0,0.00339234,0.03011827,0,0.141429748,0,0.053233302,0,0,0,0.024638292,0,0.062999837,0.03294438,0.138981912,4.00E-04,0.013967784,0,0.01372593,0,0.005406991,0.53,3118.051953,4308.368448,7426.420402,0.713921113,-1.31,-2.06,0.99,0.56,-0.36,1.51,-0.54,0.83,1.39,-0.42,-1.35,1.03,-0.32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018529704,0.036117392,CNV_LOW,0.542297565,-0.002030269,-0.016362319,-0.022462196,0.348272487,0.000405656,0,0.140145736,0.592598499,0.267255766,MSI-H,No,WT,WT,3.674088391,3.197452556,2.198912528,8.573647187,8.049848549,6.87036472,MSI-H,17.42424242,1.893939394,54.54545455,18.93939394,3.787878788,3.409090909,Blood_normal,a5ae9005-f7c0-4291-b288-df8380b29a04_gdc_realn.bam,f275ea09-8877-4159-bdee-d8781d22e740,Tumor,d29c7529-d7da-4ee6-b7c3-6af5083003af_gdc_realn.bam,37591282-8e97-4e53-82b4-13eca58214b8,Blood_normal,9c47ab95-7b62-4528-a9e5-b73bfe99dab3,Tumor,eb1be1de-2a5d-4ae4-a9bf-e62c34765a69,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTGGCC_S8_L008_R1_001.fastq.gz,420d5f06-1231-4f16-b090-9e27da0b3c83,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTGGCC_S8_L008_R2_001.fastq.gz,cc46aefa-d070-4531-b35b-69fef48f8fd4,Tumor,15cafcd9-5d44-4bd8-a421-7d5d7cc59aed,YES,PASS
+S092,C3N-01212,No,2,6,130N,C3N-01212-03,CPT0075380003,Tumor,No,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT3a (FIGO IIIA),pNX,cM0,Staging Incomplete,Stage III,IIIA,1,30.48,63,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Unifocal,1.5,Positive,45,Positive,40,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - positive;,Cannot be determined,2,0.00437853,0.057148988,0.113720547,0.023855996,0.084042172,0.003461457,7.00E-10,0.713392309,0.022408764,0,0.059210761,0.090537458,0,0.134138183,0.001489306,0.069264334,0,0,8.00E-04,0.05131453,0,0.074846217,0.034536967,0.060843228,0,0,0,0.108706228,0,0.041876884,0.75,2514.266274,5583.319319,8097.585593,0.593932274,-0.718,-0.65,1.72,1.96,-1.34,1.34,1.31,-1.14,0.34,1.52,0.02,1.06,0.43,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,4.76E-05,0.044778387,CNV_LOW,0.631934059,-0.015766433,-0.011803866,-0.009191827,0,0,0,0.199829128,0.658231952,0.14193892,MSI-H,No,MS_indel,Frame_Shift_Del,4.37949908,3.839223293,2.907188335,9.324180547,8.751544059,7.721099189,MSI-H,16.04651163,2.558139535,56.74418605,19.06976744,2.558139535,3.023255814,Blood_normal,de27373b-d06d-4246-bfbd-1fb88cc0f8fd_gdc_realn.bam,09d74eea-1860-4270-85dc-ce5bebbf6577,Tumor,26e50c35-a756-496a-a377-639add0619ee_gdc_realn.bam,9f929c14-586e-40d7-842c-7fcd83434081,Blood_normal,4bc8e960-760d-4cf1-b6b3-b52374645e4f,Tumor,aaddc8fa-2b31-41ff-8cbe-468c88441734,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTCCGC_S5_L008_R1_001.fastq.gz,77cb79fc-c379-49ee-a61a-861b047a8d63,Tumor,170905_UNC31-K00269_0079_AHLCF7BBXX_GTCCGC_S5_L008_R2_001.fastq.gz,70834e25-c655-4715-a7f8-57741576d457,Tumor,ff1a87b5-026f-4592-9e90-6a156e07dca5,YES,PASS
+S093,C3N-01217,No,2,7,130N,C3N-01217-01,CPT0076500003,Tumor,No,Ukraine,FIGO grade 1,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pNX,cM0,Staging Incomplete,Stage I,IB,0,36.2,58,No,NA,NA,Female,"Other, specify",Endometrium,Multifocal,0.8,Positive,80,Positive,85,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - positive;,Cannot be determined,None,0.005318682,0.104628581,0.090943407,0.063668402,0.084442983,0.002557199,1.81E-09,0.648440744,0,0,0.049541359,0.038721394,0,0.113462274,0,0,0.012461263,0,0,0.019183604,0.039610944,0,0.004096005,0.193189949,0,0.009557828,0.026629608,0.00227769,0.001268082,0,0.51,4181.890175,4241.500932,8423.391107,0.416725667,0.736,1.06,-1.37,-1.21,-0.49,-0.96,-1.1,-1.01,0.05,-1.03,0.17,-0.56,-0.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018036765,7.33E-05,CNV_LOW,0.030015236,0,-0.000473355,0,0.000333665,0.00034245,0.002322995,0.046631214,0.337676312,0.615692473,MSS,No,WT,WT,1.142367912,0.947697998,0.354607617,5.321928095,4.95419631,3.321928095,CNV_low,16.66666667,3.333333333,60,13.33333333,0,6.666666667,Blood_normal,b198fef8-363e-4231-97b0-2b31b0f928fa_gdc_realn.bam,c4fd71e5-acf9-4b52-a1bb-e1f757042066,Tumor,73ad916a-e94f-45f8-9ca7-3de307f3a46c_gdc_realn.bam,c549bd7c-0935-4276-a507-7b7fc9b54a60,Blood_normal,1a91f81d-f6e4-4ce3-9d38-0ab56f84767b,Tumor,9a74ea27-cda9-4e07-a710-db1b432ecf4e,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_GCCAAT_S22_L006_R1_001.fastq.gz,e60a5351-d5bd-4e4c-b9d2-64fcdca999b1,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_GCCAAT_S22_L006_R2_001.fastq.gz,03100748-7dc8-4f11-a46f-9153d3769556,Tumor,df38c029-6aec-4541-8a56-77c59e233226,YES,PASS
+S094,C3N-01219,No,3,12,128C,C3N-01219-03,CPT0076160003,Tumor,No,Ukraine,FIGO grade 3,under 50 %,Endometrioid,YES,Normal,pT3a (FIGO IIIA),pN0,cM0,Staging Incomplete,Stage III,IIIA,0,26.14,58,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Unifocal,5,Positive,90,Positive,80,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - patchy positive; Vimentin - negative;,Cannot be determined,1,0.007294917,0.035342991,0.160684601,0.052033662,0.076176778,0.009797105,2.82E-08,0.658669918,0,0.009145218,0.014336444,0.264904483,0,0.173192267,0.031665751,0.104941128,0,0.265176591,0,0.036010774,0,0.181109898,0.20547773,0.307954382,0.021208229,0,0.014877105,0,0,0,1.63,3607.302476,8247.18979,11854.49227,0.72200942,0.583,-0.458,-0.54,-0.03,-0.61,-0.24,0.26,-0.67,-0.19,-0.38,1.17,0.5,-0.62,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018047975,0.021422303,CNV_LOW,0.39358965,-0.003782297,-0.002169021,-0.00619722,0,0,0,0.312899462,0.57498839,0.112112148,MSI-H,No,MS_indel,Frame_Shift_Del,4.42387369,3.915204152,2.883172881,9.370687407,8.832890014,7.693486957,MSI-H,14.28571429,3.516483516,58.68131868,18.46153846,3.296703297,1.758241758,Blood_normal,d4c44cff-6bfd-46e0-a542-426345f7afff_gdc_realn.bam,6b91773f-2a59-4a80-ac8c-f7bd3a92755d,Tumor,14e343cd-1aff-4e8b-8bd8-7c4813ec2405_gdc_realn.bam,8f2fb70a-9111-4662-b07b-937b3880b9f2,Blood_normal,130cc23b-3c75-406e-b2e3-6c762c8dcfc7,Tumor,8fc2d44a-4784-476b-83f4-d860c3e7ef4b,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_ACAGTG_S23_L006_R1_001.fastq.gz,e20212d2-8826-470e-8ea9-fd482194e637,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_ACAGTG_S23_L006_R2_001.fastq.gz,4411b385-dfc9-42a7-b200-a2ef23950c1e,Tumor,13797ccd-aaba-4224-ac1c-c5aff52d21fe,YES,PASS
+S095,C3N-01267,No,3,10,129C,C3N-01267-03,CPT0027970003,Tumor,No,Ukraine,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT1b (FIGO IB),pN1 (FIGO IIIC1),cM0,Staging Incomplete,Stage III,IIIC1,1,30.85,57,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Multifocal,1.2,Positive,80,Positive,90,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - positive;,Cannot be determined,2,0.003797846,0.077860922,0.091939368,0.035186068,0.178905676,0.002576266,1.90E-08,0.609733834,0,0.0019259,0.012271592,0.034484496,0,0.208514252,0,0,0,0,0.011965047,0.02334588,0.006748859,0,0.007875625,0.162633253,0,0.00602792,0.036807022,0,0.007400155,0,0.52,4265.396681,4751.649418,9017.046099,0.334574905,-0.248,-0.48,-0.11,-0.56,-1.1,-0.32,-1.16,-0.68,1.33,-0.91,-0.97,-0.82,0.55,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.018025661,0.251575936,CNV_LOW,2.141732857,0,-0.079315575,-0.081189645,0.410300448,0,0,0.096244666,0.359083985,0.544671348,MSS,No,WT,WT,0.993284573,0.970671335,0.043988012,5.044394119,5,1,CNV_low,9.677419355,12.90322581,51.61290323,16.12903226,0,9.677419355,Blood_normal,ef8eadff-62be-4e36-a2d3-9d4a618b2ff2_gdc_realn.bam,1042e2b7-3bc3-4b8d-8437-bd7bfdf5d5d4,Tumor,441ce217-806a-46cc-84fd-6391b0ba2d82_gdc_realn.bam,fc5809c5-56de-45d7-8805-0ef944f75972,Blood_normal,a532762d-a05a-4be3-911a-21a9c44c1c5c,Tumor,6a16e63f-e0cb-4ff2-bc69-f077232f3b2c,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_ATGTCA_S18_L005_R1_001.fastq.gz,63431135-a82e-4a29-a17d-70ca5b7def70,Tumor,170912_UNC31-K00269_0081_AHL3YKBBXX_ATGTCA_S18_L005_R2_001.fastq.gz,a09574f4-c3e1-428c-a8a0-412950a3872a,Tumor,377ea8b8-844d-4667-be85-63165095a652,YES,PASS
+S096,C3N-01346,No,4,13,127N,"C3N-01346-03,C3N-01346-04",CPT0116550004,Tumor,No,Poland,NA,50 % or more,Serous,YES,Normal,pT1b (FIGO IB),pN2 (FIGO IIIC2),cM0,No pathologic evidence of distant metastasis,Stage III,IIIC2,1,34,63,NA,NA,NA,Female,Anterior endometrium,NA,Unifocal,5.5,Positive,5,Positive,10,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,"Ki67 positive 90%, Vimentin positive - 5%, SMA negative, CD10 - negative, miogenine - negative, desmine - negative, S-100 - negative",Cannot be determined,1,0.006765475,0.12302872,0.108917746,0.031112939,0.085797165,0.00826782,7.83E-09,0.636110127,0.003670969,0,0.029677669,0.085321157,0,0.14099011,0.002679818,0.081837455,0.025438194,0,0,0.029795527,0.015829072,0.101667828,0.088930214,0.26555035,0.019213128,0,0.012959135,0,0.016439373,0,0.92,4617.267994,7726.246196,12343.51419,0.832173788,-0.246,-3.06,-0.44,0.62,1.02,-1.4,-0.05,-0.34,0.31,-0.9,-0.1,0.04,-1.36,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.273023239,0.073642226,CNV_HIGH,3.902728214,-0.001980205,-0.001974315,-0.31732797,0.092649243,0.426536847,0.011365024,NA,NA,NA,MSS,No,WT,WT,1.258934249,1.162461482,0.207641466,5.523561956,5.357552005,2.584962501,CNV_high,10,25,32.5,12.5,10,10,Blood_normal,3db17e2b-7fc3-4dc1-9fa9-e0859db32a3e_wxs_gdc_realn.bam,1a0e6a0f-57d5-4378-80a4-d124eb1a8779,Tumor,c045de53-714c-4f81-8f6a-333f80db7d72_wxs_gdc_realn.bam,57d796cc-0636-4958-937c-4f8bdb46ebbe,Blood_normal,0149a1bf-30e0-41c3-994d-70bb82818c4d,Tumor,1a0c3a89-4547-4e37-aede-265ae25de36f,Tumor,180502_UNC32-K00270_0094_BHTVLMBBXX_CAGATC_S11_L003_R1_001.fastq.gz,e471cc71-0f0b-421b-a1fb-40136d957698,Tumor,180502_UNC32-K00270_0094_BHTVLMBBXX_CAGATC_S11_L003_R2_001.fastq.gz,05f8cd15-4e8b-4bc0-abb9-86024f30587e,Tumor,b3fa7771-1083-4451-a72f-648e12f99fb6,NA,NA
+S097,C3N-01349,No,4,14,127N,C3N-01349-02,CPT0116630003,Tumor,No,Poland,NA,50 % or more,Serous,YES,Normal,pT1b (FIGO IB),pN0,cM0,No pathologic evidence of distant metastasis,Stage I,IB,1,31,77,Yes,NA,NA,Female,Anterior endometrium,NA,Multifocal,5,Negative,NA,Negative,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Overexpression,NA,Cannot be determined,4 or more,0.009739342,0.037214567,0.111071052,0.026449078,0.084138822,0.010901218,8.09E-10,0.720485921,0,0.040723911,0.026971786,0.063511306,0,0.031092652,0,0.036809347,0.017676251,0.004317008,0,0.066840911,0,0.207247911,0.052855743,0.176413766,0,0.029581456,0,0.01149725,0,0.044460701,0.81,3784.055606,7310.646586,11094.70219,0.740758872,-0.808,-2.1,-0.03,1.41,1.98,-0.27,1.42,-0.4,-0.34,1.38,1.91,-1.52,-0.64,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0.199043404,0.126832043,CNV_HIGH,3.377673268,-0.016427767,-0.004073222,-0.028878096,0.151209284,0.176426135,0.001457121,NA,NA,NA,MSS,No,WT,WT,2.066555769,2.023260846,0.168435383,6.700439718,6.64385619,2.321928095,CNV_high,13.13131313,14.14141414,48.48484848,9.090909091,8.080808081,7.070707071,Blood_normal,567a10ad-cdde-4eb0-8466-fc347bebfc3d_wxs_gdc_realn.bam,529af22f-6b99-4f52-8adf-7d13636e3990,Tumor,22644f97-481a-4e62-a318-5f5e77b80855_wxs_gdc_realn.bam,c94b89bc-2589-4dbc-b42c-53dbd252d63b,Blood_normal,6207248b-6334-4d87-a3fe-328bd55629f8,Tumor,fc1377b1-042b-4c9d-91a4-03028f920461,Tumor,180502_UNC32-K00270_0094_BHTVLMBBXX_ACTTGA_S10_L003_R1_001.fastq.gz,61da9b88-7482-4b7a-8e66-275ecf12ceaf,Tumor,180502_UNC32-K00270_0094_BHTVLMBBXX_ACTTGA_S10_L003_R2_001.fastq.gz,c23d4640-ab47-475d-9805-92cabdd16f22,Tumor,de22a6aa-fd4b-49bf-9177-5839c2db4c1c,NA,NA
+S098,C3N-01510,No,4,14,131,C3N-01510-02,CPT0079650003,Tumor,No,United States,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT2 (FIGO II),pNX,Staging Incomplete,Staging Incomplete,Stage II,II,1,40.72,53,Yes,White,Not-Hispanic or Latino,Female,"Other, specify",Bulky tumor involving both anterior and posterior walls,Multifocal,8.5,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,None,0.004837211,0.024165291,0.089824854,0.043054277,0.06940191,0.002293855,3.05E-09,0.766422598,0,0,0.019304946,0.073887376,0,0.074485955,0,0.045953357,0,0,0.003062666,0.002659192,0.003415035,0.01575249,0.00889596,0.137475272,0,0.012682978,0,0.037907652,0.002005813,0.022511308,0.46,1445.656243,4373.982627,5819.638871,0.518388581,-0.0498,-1.21,0.47,3.42,-1.16,1.15,0.59,-0.53,-0.15,1.06,-0.93,1.48,0.63,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,3.64E-06,0.000132588,CNV_LOW,0.332383257,-0.009007688,-0.00690175,-0.00851741,0,0.001213689,0,0.092486411,0.753031126,0.154482463,MSS,No,WT,WT,3.341007532,3.2871191,0.455772534,8.209453366,8.14974712,3.700439718,CNV_low,2.473498233,1.413427562,94.34628975,0.706713781,0.706713781,0.35335689,Blood_normal,f8e5df28-e72a-43d1-867f-2774935b9d1b_gdc_realn.bam,1b6a1313-bd3c-44c1-a4e6-379bee72ac96,Tumor,6071b625-57fb-4113-9926-83e265a78d28_gdc_realn.bam,b9b9dea4-2021-4f06-8697-048fa9d1baa0,Blood_normal,5ef81d6b-2d4a-4ed8-960b-eea9cf8c3598,Tumor,f8c5708f-f692-4381-92ea-4dcf5a970d14,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_AGTCAA_S15_L004_R1_001.fastq.gz,9c598e26-c32b-4405-af2a-c560941fb81a,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_AGTCAA_S15_L004_R2_001.fastq.gz,086afb47-3e44-4075-900f-c9efeb622959,Tumor,d638a8d7-7f93-4497-be48-921ede96da0b,YES,PASS
+S099,C3N-01520,No,3,12,129N,C3N-01520-01,CPT0076860003,Tumor,No,Ukraine,FIGO grade 2,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,29.37,69,No,NA,NA,Female,"Other, specify",Endometrium,Multifocal,1,Positive,60,Positive,65,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentin - patchy positive;,Cannot be determined,2,0.007382878,0.035083294,0.133624962,0.04943904,0.077794978,0.003529352,9.11E-08,0.693145404,0,0,0.058648319,0.066290344,0,0.098306727,0.065220541,0.063741986,0,0.070019705,0,0.027449986,0,0.063996724,0.059493274,0.158603661,0,0,0,0.049773167,0,0.008455565,0.79,2659.994806,6104.870732,8764.865538,0.634661788,-0.155,0.856,0.03,0.6,-0.9,-0.2,0.27,1.26,-1.17,-0.43,0.29,0.35,0.57,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0.075011161,CNV_LOW,0.91735067,-0.019652702,-0.021330753,-0.018841793,0,0,0,0.200823898,0.714476676,0.084699426,MSI-H,No,WT,Frame_Shift_Ins_Nonsense_Mutation,6.863169791,6.7573255,3.207157908,11.86457308,11.75780667,8.060695932,MSI-H,23.22357019,2.253032929,53.43731947,16.926632,1.964182553,2.195262854,Blood_normal,22d8111e-ed85-442f-a8ff-2bc44792e155_gdc_realn.bam,76483f05-7126-45ca-ada5-39b1d8af487c,Tumor,7dea6331-0e16-40e5-b908-e9b403f848f6_gdc_realn.bam,03c88b74-e1bd-4a90-ac3d-2fbd9bdfaa2b,Blood_normal,021c018b-52e5-4d30-86e0-a0758b6eb859,Tumor,d4f7f077-eadb-471d-8ab2-dcc8035aa087,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_GCCAAT_S2_L007_R1_001.fastq.gz,a43b1b15-3b59-4e11-b589-8d89b7e63209,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_GCCAAT_S2_L007_R2_001.fastq.gz,b3135f8c-5c50-41a8-ae39-a427c93a993f,NA,NA,YES,PASS
+S100,C3N-01521,No,4,14,128C,C3N-01521-01,CPT0076680003,Tumor,No,Ukraine,FIGO grade 3,under 50 %,Endometrioid,YES,Normal,pT1a (FIGO IA),pNX,cM0,Staging Incomplete,Stage I,IA,0,29.4,75,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Unifocal,4.2,Positive,60,Positive,80,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - negative; Vimentin - patchy positive;,Cannot be determined,2,0.004994598,0.035242686,0.085099802,0.031691155,0.098499307,0.005015088,2.82E-07,0.739457081,0.01763236,0,0.007080589,0.06592389,0,0.071179987,0,0,0.02562487,0,0.034393768,0.008318946,0.011821109,0.063638898,0.042764415,0.095607114,0,0.043358278,0.083143647,0,0,0.019512128,0.59,2795.977059,5789.410018,8585.387077,0.75823875,-0.37,-0.451,-0.18,0.87,2.7,-0.82,0,-0.07,-1.24,-0.11,0.29,-0.04,-0.73,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.289753048,0.329561071,CNV_HIGH,5.501272687,0,0,-0.076203323,0.260744145,0.242258634,0.01483176,0.248159781,0.544443077,0.207397142,MSS,No,WT,WT,0.993284573,0.947697998,0.086674411,5.044394119,4.95419631,1.584962501,CNV_high,6.666666667,16.66666667,50,13.33333333,6.666666667,6.666666667,Blood_normal,ac16a9c0-e9f4-4bdf-ba76-78d9438f0b10_gdc_realn.bam,c670d6d3-0ad3-481c-9336-20d351ffc7dc,Tumor,ff6a7b50-074e-4229-a5b1-282d7e4c9208_gdc_realn.bam,1c684370-d3c3-4bf9-82df-6cd1a2b107e2,Blood_normal,de536564-ee0b-471a-a157-203f5b67fb71,Tumor,2c1c0ba9-0049-4e08-adbd-173e7d68433d,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_ACAGTG_S3_L007_R1_001.fastq.gz,0acc093f-71f7-4f80-a9b2-20e017cbd7e4,Tumor,170830_UNC31-K00269_0078_AHLCVMBBXX_ACAGTG_S3_L007_R2_001.fastq.gz,1b05444d-acbf-4aa3-9981-0803c58db907,Tumor,740061a5-ffd5-4640-b888-aae0fae0779e,YES,PASS
+S101,C3N-01537,No,4,13,129C,C3N-01537-01,CPT0097010003,Tumor,No,Ukraine,FIGO grade 2,50 % or more,Endometrioid,YES,Normal,pT2 (FIGO II),pN0,cM0,Staging Incomplete,Stage II,II,0,35.42,74,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Unifocal,1.5,Positive,80,Positive,20,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cytokeratin 7 - positive; Vimentin - negative;,Cannot be determined,1,0.014872008,0.036505622,0.194267542,0.033524424,0.064904537,0.010912287,1.49E-09,0.645013579,0.045215956,0.00180072,0.024315518,0.143705494,0,0.239386292,0.049980042,0.139580503,0,0.029759765,0,0.047962053,0,0.166271953,0.207608597,0.390061502,0,0,0.004351604,0,0,0,1.49,4336.999577,9113.414303,13450.41388,0.769073842,-0.318,-1.64,-0.61,-0.97,2.09,-0.24,1.33,-0.51,-0.88,0.53,1.81,-1.75,0.64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.171819961,0.085911472,CNV_HIGH,1.556572742,-0.021722017,0,-0.027478768,0.141450915,0.083155192,0.001175305,0.335899561,0.330951988,0.333148452,MSS,No,WT,WT,1.05907234,0.924352909,0.245810227,5.169925001,4.906890596,2.807354922,CNV_high,17.24137931,13.79310345,58.62068966,3.448275862,3.448275862,3.448275862,Blood_normal,ef01edb4-ba7c-427e-a2b0-d5099f8d06fb_wxs_gdc_realn.bam,07b497aa-3dd9-433a-b3fc-1d6ff9c23806,Tumor,af300af0-16e5-424e-8f44-acb85517ac35_wxs_gdc_realn.bam,8896b39a-df49-48b1-97f6-2573ecef6b08,Blood_normal,809337b5-5049-4bb0-942f-b85a59d52743,Tumor,1c4ad51e-2c49-44ce-855e-8601191e51f1,Tumor,171107_UNC32-K00270_0063_BHLNTCBBXX_CAGATC_S60_L007_R1_001.fastq.gz,c00f784d-3143-4e88-b652-40a42aa38f53,Tumor,171107_UNC32-K00270_0063_BHLNTCBBXX_CAGATC_S60_L007_R2_001.fastq.gz,53445ea6-98ca-4134-a8bb-cd64497935a7,Tumor,5152f497-62fb-48b5-bdbd-37053a905ff5,YES,PASS
+S102,C3N-01802,No,4,16,128C,C3N-01802-01,CPT0080030003,Tumor,No,United States,NA,under 50 %,Serous,YES,Normal,pT2 (FIGO II),pN0,Staging Incomplete,Staging Incomplete,Stage II,II,1,24.32,85,Yes,Black or African American,Not-Hispanic or Latino,Female,"Other, specify",entire uterine cavity,Unifocal,3.8,Cannot be determined,NA,Cannot be determined,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,NA,Cannot be determined,1,0.009022295,0.056699664,0.22202567,5.12E-08,0.069009999,0.008491865,4.80E-09,0.63475045,0.003029268,0,0.038323966,0.24395747,0,0.125632435,0.275833718,0,0,0,0.065210688,0,0.023056066,0.056808071,0.099728201,0.145488511,0.00549566,0,0,0.058770763,0.021394706,0.047270477,1.21,4822.4358,8568.337599,13390.7734,0.599249078,-0.403,-2.9,2.45,0.88,0.41,1.7,3.31,-0.44,-0.1,2.82,2.03,1.53,-0.99,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.188981339,0.111791982,CNV_HIGH,3.07327003,-0.068690488,-0.000314798,-0.246613392,0.118123338,0.163937849,0,0.365215471,0.443528669,0.19125586,MSS,No,WT,WT,1.576001633,1.417826394,0.389123498,6.022367813,5.781359714,3.459431619,CNV_high,24.07407407,5.555555556,57.40740741,5.555555556,1.851851852,5.555555556,Blood_normal,f5ae7b0e-9213-4424-a10e-7f1383d33b5b_gdc_realn.bam,b16d3cc5-43af-4b25-ab64-b262492b6e42,Tumor,ef72c57f-f998-4e9a-a563-b5969a93bcd8_gdc_realn.bam,00527d6d-5b39-4a0e-a36a-30a88d5601ae,Blood_normal,68ed2c5e-6ac7-4305-b273-d1c649e89954,Tumor,95d70287-457f-4f51-80b5-25d771f72a64,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_CCGTCC_S16_L004_R1_001.fastq.gz,06944ccc-05d4-42d9-917f-3e3e86355149,Tumor,170906_UNC32-K00270_0058_BHLJ7FBBXX_CCGTCC_S16_L004_R2_001.fastq.gz,6820bed8-165e-49af-bfeb-3cd127060651,Tumor,2533a15d-a790-4a3f-868a-fd30f6329dc8,YES,PASS
+S103,C3N-01825,No,3,10,129N,C3N-01825-01,CPT0097710003,Tumor,No,Ukraine,NA,under 50 %,Serous,YES,Normal,pT1a (FIGO IA),pN0,cM0,Staging Incomplete,Stage I,IA,0,34.06,70,No,NA,NA,Female,"Other, specify",Entire uterine cavity,Unifocal,5,Negative,NA,Negative,NA,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,Cannot be determined,CK7 - positive; Vimentine - negative;,Cannot be determined,Unknown,0.004767503,0.237164751,0.089595072,0.028463459,0.184282336,0.004165019,7.00E-09,0.451561854,0.058010582,0,0.064580904,0.030642108,0,0.187681071,0,0.05056075,0,6.00E-04,0,0.003258252,0,0.430658443,0.064693237,0.22904602,0,0,0.042631692,0.005492839,0,0.022128812,1.19,5933.572259,6081.794155,12015.36641,0.465887297,-0.867,-1.42,-0.51,-0.72,0.28,-0.62,0.19,-1,1.75,0.63,0.31,-1.65,-0.35,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.156151631,0.131671202,CNV_HIGH,0.771704727,0,-0.015492594,-0.004088097,0.025331244,0.075747549,0,0.250298316,0.16252204,0.587179644,MSS,No,WT,WT,1.05907234,1.037474705,0.043988012,5.169925001,5.129283017,1,CNV_high,23.52941176,5.882352941,58.82352941,11.76470588,0,0,Blood_normal,e85c33bd-3b8d-4bad-9735-e48f81491019_wxs_gdc_realn.bam,bb985682-1aad-4aa0-acd4-039b2e5f2b49,Tumor,3ec666f3-df1c-438d-bcc1-555e4c7db92b_wxs_gdc_realn.bam,e079441f-c166-44ee-b53b-33a6f6fcff85,Blood_normal,522582a7-24d6-4b9a-9e6a-5116e3ca5c72,Tumor,eb0e4fbc-5442-43f8-ae0e-4d8d6cb67f59,Tumor,171107_UNC32-K00270_0063_BHLNTCBBXX_TAGCTT_S56_L006_R1_001.fastq.gz,fb6e7290-6e05-40fd-b122-340633db8e34,Tumor,171107_UNC32-K00270_0063_BHLNTCBBXX_TAGCTT_S56_L006_R2_001.fastq.gz,ee34816a-0060-45f6-9813-334207970349,Tumor,4190843f-c561-452f-bffb-f78e62131558,YES,PASS
+S104,C3N-01825_replication,Yes,4,16,131,C3N-01825-03,CPT0097730003,Tumor,No,Ukraine,NA,under 50 %,Serous,YES,Normal,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.449,-1.35,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S105,C3L-00006,No,2,5,128C,C3L-00006-06,CPT0001470001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.134372832,0.393,-0.0215,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180313_UNC32-K00270_0085_BHT2MGBBXX_CCGTCC_S39_L007_R1_001.fastq.gz,caea5e0c-f3be-4db4-8837-c3f5d89ac4a6,Adjacent_normal,180313_UNC32-K00270_0085_BHT2MGBBXX_CCGTCC_S39_L007_R2_001.fastq.gz,d5b2b761-e82a-4829-8aab-a78d87cde2a1,Adjacent_normal,3639fff9-4be4-4787-8565-24da63af1595,NA,NA
+S106,C3L-00361,No,1,2,127N,C3L-00361-06,CPT0002450001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.060639595,0.39,1.18,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_TTAGGC_S5_L002_R1_001.fastq.gz,2bb0bf8f-ed23-4cc6-aaa6-f36f1d0811ab,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_TTAGGC_S5_L002_R2_001.fastq.gz,6c30343e-37cb-40c2-898e-44c3125b5afd,Adjacent_normal,30f27a74-757b-4ac8-a2e8-51c36b79c09f,NA,NA
+S107,C3L-00586,No,2,6,129N,C3L-00586-06,CPT0023640001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.021185693,0.138,0.268,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_GCCAAT_S7_L002_R1_001.fastq.gz,52d135b5-6065-44c4-abc4-8245ce9694dd,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_GCCAAT_S7_L002_R2_001.fastq.gz,9509bda6-905c-4583-b31a-debc0d5afc82,Adjacent_normal,736ecc5f-7d9b-4179-bb05-42a48e26d29d,NA,NA
+S108,C3L-00601,No,2,6,127N,C3L-00601-06,CPT0007660001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.097223501,1.48,0.096,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_ACAGTG_S1_L002_R1_001.fastq.gz,15a20d32-beec-4388-9408-480db8a1ae5c,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_ACAGTG_S1_L002_R2_001.fastq.gz,0a6f5a3c-53aa-41bc-8c68-7292e1e6b059,Adjacent_normal,a370a813-54eb-4977-964c-8da43f070efc,NA,NA
+S109,C3L-00769,No,2,5,129C,C3L-00769-02,CPT0026540001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.613,0.95,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S110,C3L-00930,No,2,7,129C,C3L-00930-02,CPT0027020001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.672,-0.879,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S111,C3L-00932,No,2,8,128N,C3L-00932-02,CPT0027130001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.163867753,0.736,1.19,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_ACAGTG_S1_L001_R1_001.fastq.gz,db54b077-a421-4528-9067-5fdc52ee5366,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_ACAGTG_S1_L001_R2_001.fastq.gz,1d2625d3-cb72-4722-aee4-b522a30139e0,Adjacent_normal,b5b182e1-0ebf-461b-a8d2-82877ff266a8,NA,NA
+S112,C3L-00947,No,1,1,129C,C3L-00947-02,CPT0027430001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.13,0.787,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S113,C3L-00963,No,2,8,129N,C3L-00963-02,CPT0016530001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.05,0.478,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S114,C3L-01246,No,2,7,127C,C3L-01246-02,CPT0080990001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.961,1.14,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S115,C3L-01249,No,1,2,128N,C3L-01249-02,CPT0080890001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.127,0.481,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S116,C3L-01252,No,1,1,128N,C3L-01252-02,CPT0080620001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.0702,0.19,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S117,C3L-01256,No,1,3,129N,C3L-01256-02,CPT0073460001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.39,1.18,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S118,C3L-01257,No,2,7,128C,C3L-01257-02,CPT0073530001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.05,1.31,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S119,C3L-01282,No,1,4,128N,C3L-01282-06,CPT0077660001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.182616031,1.12,0.835,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_CGATGT_S10_L003_R1_001.fastq.gz,b75aa815-18fc-4f67-8bd8-e80d70bf58d7,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_CGATGT_S10_L003_R2_001.fastq.gz,6c986f25-4352-4b8d-9f37-2de28762849a,Adjacent_normal,0041d301-1307-4d69-80ba-66be7f48afb6,NA,NA
+S120,C3L-01304,No,1,4,129N,C3L-01304-06,CPT0064240001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.149158878,0.197,0.867,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_CAGATC_S3_L001_R1_001.fastq.gz,fb12c9f3-e9e4-4234-b1ea-b652ea9e064d,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_CAGATC_S3_L001_R2_001.fastq.gz,9272b657-6b27-4628-adff-898271064f68,Adjacent_normal,40da7b55-0225-4641-ac4a-27630855d845,NA,NA
+S121,C3L-01307,No,1,4,127N,C3L-01307-06,CPT0064320001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.086623051,0.622,0.975,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_ACTTGA_S2_L001_R1_001.fastq.gz,636ef258-acbd-443f-99c4-8966ebf303af,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_ACTTGA_S2_L001_R2_001.fastq.gz,8b68efdb-23b2-4936-8426-1d26933f64a5,Adjacent_normal,44d9a77d-4a1e-4130-9bc2-4d82107db6d0,NA,NA
+S122,C3L-01311,No,1,3,128C,C3L-01311-06,CPT0077780001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.105167881,0.815,1.69,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_TGACCA_S6_L002_R1_001.fastq.gz,46a9c137-590c-40f8-83a9-848a8d0437a5,Adjacent_normal,180109_UNC31-K00269_0107_AHNHY2BBXX_TGACCA_S6_L002_R2_001.fastq.gz,7bcd0306-0e63-4202-88b1-1ed2497e3547,Adjacent_normal,9e327c15-9d40-4c45-8375-748e81c21667,NA,NA
+S123,C3L-01744,No,1,3,127C,C3L-01744-02,CPT0093290000,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.155,0.489,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S124,C3N-00333,No,2,5,127C,C3N-00333-07,CPT0011020003,Adjacent_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.080335244,0.198,-0.236,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,171215_UNC32-K00270_0073_BHN3NFBBXX_CCGTCC_S5_L002_R1_001.fastq.gz,3b0cb9d5-e735-43db-81bc-dd41c5404e88,Adjacent_normal,171215_UNC32-K00270_0073_BHN3NFBBXX_CCGTCC_S5_L002_R2_001.fastq.gz,2e65e11f-f0c6-43e1-a97f-42beb6bcf2e9,Adjacent_normal,14917423-6bba-484c-b97b-b754ee072eb7,NA,NA
+S125,C3N-00383,No,1,2,129C,C3N-00383-06,CPT0022850001,Adjacent_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.155400656,0.232,0.411,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180313_UNC32-K00270_0085_BHT2MGBBXX_AGTCAA_S42_L008_R1_001.fastq.gz,d8fdcaab-a8eb-452b-85d3-4e8342a5aa06,Adjacent_normal,180313_UNC32-K00270_0085_BHT2MGBBXX_AGTCAA_S42_L008_R2_001.fastq.gz,edbf898c-dc38-48e4-b61b-09f11115d61e,Adjacent_normal,c8f62f3e-d2b3-4db8-951d-01ce4207c713,NA,NA
+S126,C3N-00729,No,2,6,128N,C3N-00729-06,CPT0064950001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.917,-0.00848,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S127,C3N-00858,No,1,1,127C,C3N-00858-05,CPT0078360001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.022841139,0.865,0.358,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180313_UNC32-K00270_0085_BHT2MGBBXX_CTTGTA_S41_L008_R1_001.fastq.gz,2bb35dfa-8915-4f44-a7f3-309bee055f37,Adjacent_normal,180313_UNC32-K00270_0085_BHT2MGBBXX_CTTGTA_S41_L008_R2_001.fastq.gz,c3a727a2-3121-46f4-8640-749db0e909dc,Adjacent_normal,bf011895-372c-4cca-b040-7bbc29d41958,NA,NA
+S128,C3N-00866,No,2,8,127C,C3N-00866-03,CPT0063110001,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.115322032,1.22,-0.172,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,171205_UNC32-K00270_0069_BHN2HGBBXX_ATCACG_S17_L005_R1_001.fastq.gz,af76a34d-cc47-4d15-a276-60e3923b3975,Adjacent_normal,171205_UNC32-K00270_0069_BHN2HGBBXX_ATCACG_S17_L005_R2_001.fastq.gz,335d84c6-609f-4669-ae8c-2242a24a4fa8,Adjacent_normal,6eb569d3-8227-4a98-b7c1-16f8f6a10b22,NA,NA
+S129,C3N-01346,No,4,13,127C,C3N-01346-05,CPT0116570003,Adjacent_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,0.689,0.554,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Adjacent_normal,180516_UNC32-K00270_0095_AHTY5LBBXX_CGTACG_S5_L008_R1_001.fastq.gz,7f559a17-edd2-4465-aee8-61684540c52e,Adjacent_normal,180516_UNC32-K00270_0095_AHTY5LBBXX_CGTACG_S5_L008_R2_001.fastq.gz,a2d046e8-2efb-4587-8ac7-fb8e6e340b40,Adjacent_normal,aa96ca57-2981-45e4-9ad5-500df569e4f7,NA,NA
+S130,C3L-00563,No,3,11,128C,C3L-00563-06,CPT0063260001,Myometrium_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.847,0.347,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S131,C3L-00605,No,3,12,127C,C3L-00605-06,CPT0063900001,Myometrium_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.839,0.802,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S132,C3L-00770,No,3,9,127C,C3L-00770-02,CPT0026660001,Myometrium_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.412,0.817,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S133,C3L-00771,No,3,11,127C,C3L-00771-02,CPT0026780001,Myometrium_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.368,-0.882,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S134,C3N-00200,No,3,9,128C,C3N-00200-06,CPT0017930001,Myometrium_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.176,0.404,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S135,C3N-01211,No,3,10,127C,C3N-01211-06,CPT0075940001,Myometrium_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.248,-0.387,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S136,NX1,No,4,13,128N,C3N-03691-01,"CPT0189160002,CPT0189160003",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.526,1.92,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S137,NX2,No,4,13,128C,"C3N-03411-01,C3N-03411-02","CPT0182950002,CPT0182950003,CPT0182960002,CPT0182960003",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.364,-1.23,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S138,NX3,No,4,14,127C,"C3N-03320-01,C3N-03320-03","CPT0182890003,CPT0182910002,CPT0182910003,CPT0182910004,CPT0182910005",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.53,1.14,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S139,NX4,No,4,14,128N,"C3N-03692-01,C3L-03602-02,C3L-03602-03,C3L-03563-01,C3L-03567-01","CPT0191260002,CPT0191260003,CPT0191270002,CPT0191270003,CPT0191270004,CPT0189520005,CPT0186270003,CPT0186250002",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.935,0.771,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S140,NX5,No,4,15,127C,"C3N-03692-01,C3N-03692-02,C3N-03692-03","CPT0189520002,CPT0189520003,CPT0189520004,CPT0189530004,CPT0189540002,CPT0189540003,CPT0189540004",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.672,-0.52,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S141,NX6,No,4,15,128N,"C3N-03691-02,C3N-03691-03","CPT0189170002,CPT0189170003,CPT0189180002,CPT0189180003",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.212,1.28,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S142,NX7,No,4,15,131,"C3N-03412-02,C3N-03413-01,C3N-03413-03,C3L-03564-01,C3N-03692-02","CPT0183020002,CPT0183050002,CPT0183050003,CPT0183070002,CPT0186240002,CPT0189530002,CPT0189530003",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.29,0.426,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S143,NX8,No,4,16,127C,"C3L-03601-01,C3L-03601-02","CPT0191190002,CPT0191190003,CPT0191200002,CPT0191200003",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.0845,0.767,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S144,NX9,No,4,16,128N,C3L-03601-03,"CPT0191210002,CPT0191210003,CPT0191210004",Enriched_normal,Yes,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.4,-0.53,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S145,NX10,No,5,17,129N,C3N-03693-03,"CPT0229850002,CPT0229850003",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.467,-0.308,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S146,NX11,No,5,17,130C,C3N-03694-01,"CPT0229890002,CPT0229890003,CPT0229890004",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.87,1.16,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S147,NX12,No,5,17,127N,"C3N-03695-01,C3N-03695-03","CPT0229950002,CPT0229950003,CPT0229970002,CPT0229970003",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.289,0.573,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S148,NX13,No,5,17,128N,C3N-03697-03,"CPT0230030002,CPT0230030003,CPT0230030004",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.709,1.05,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S149,NX14,No,5,17,128C,"C3N-03699-01,C3N-03699-02","CPT0230130002,CPT0230130003,CPT0230130004,CPT0230140002,CPT0230140003,CPT0230140004",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.215,-0.0045,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S150,NX15,No,5,17,131,C3N-03700-01,"CPT0230190002,CPT0230190003,CPT0230190004",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.558,0.614,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S151,NX16,No,5,17,129C,"C3N-03701-01,C3N-03701-02,C3N-03701-03","CPT0230250002,CPT0230260002,CPT0230270002,CPT0230270003,CPT0230270004",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.139,-0.435,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S152,NX17,No,5,17,127C,"C3N-03706-01,C3N-03706-02,C3N-03706-03","CPT0230400002,CPT0230400003,CPT0230400004,CPT0230410002,CPT0230410003,CPT0230410004,CPT0230420002,CPT0230420003,CPT0230420004",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.991,-0.874,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+S153,NX18,No,5,17,130N,"C3N-03708-01,C3N-03708-02,C3N-03708-03","CPT0230460002,CPT0230460003,CPT0230460004,CPT0230470002,CPT0230470003,CPT0230470004,CPT0230480002,CPT0230480003,CPT0230480004",Enriched_normal,No,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0.85,0.931,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/examples/datasets/Dou-ucec-ground-truth.csv b/examples/datasets/Dou-ucec-ground-truth.csv
new file mode 100644
index 00000000..888ceed0
--- /dev/null
+++ b/examples/datasets/Dou-ucec-ground-truth.csv
@@ -0,0 +1,180 @@
+source,target
+idx,
+Proteomics_Participant_ID,Case_id
+Case_excluded,Case_excluded
+Proteomics_TMT_batch,Batch
+Proteomics_TMT_plex,Plex
+Proteomics_TMT_channel,ReporterName
+Proteomics_Parent_Sample_IDs,
+Proteomics_Aliquot_ID,Aliquot_ID
+Proteomics_Tumor_Normal,Group
+Proteomics_OCT,
+Country,Participant_country
+Histologic_Grade_FIGO,Histologic_grade
+Myometrial_invasion_Specify,"Myometrial_invasion,Myometrial_invasion_present_specify"
+Histologic_type,Histologic_Type
+Treatment_naive,Cancer_history_history_of_any_treatment
+Tumor_purity,ABSOLUTE_tumor_purity
+Path_Stage_Primary_Tumor-pT,Pathologic_staging_primary_tumor_pt
+Path_Stage_Reg_Lymph_Nodes-pN,Pathologic_staging_regional_lymph_nodes_pn
+Clin_Stage_Dist_Mets-cM,Clinical_staging_distant_metastasis_cm
+Path_Stage_Dist_Mets-pM,
+tumor_Stage-Pathological,Tumor_stage_pathological
+FIGO_stage,
+LVSI,
+BMI,BMI
+Age,Age
+Diabetes,Diabetes
+Race,Race
+Ethnicity,Ethnicity
+Gender,Sex
+Tumor_Site,Tumor_site
+Tumor_Site_Other,Tumor_site_other
+Tumor_Focality,Tumor_focality
+Tumor_Size_cm,Tumor_size_cm
+Estrogen_Receptor,Ancillary_studies_estrogen_receptor
+Estrogen_Receptor_%,
+Progesterone_Receptor,Ancillary_studies_progesterone_receptor
+Progesterone_Receptor_%,
+MLH1,Ancillary_studies_mlh1
+MLH2,
+MSH6,Ancillary_studies_msh6
+PMS2,Ancillary_studies_pms2
+p53,Ancillary_studies_p53
+Other_IHC_specify,
+MLH1_Promoter_Hypermethylation,Ancillary_studies_mlh1_promoter_hypermethylation
+Num_full_term_pregnancies,Donor_information_number_of_full_term_pregnancies
+EPIC_Bcells,
+EPIC_CAFs,
+EPIC_CD4_Tcells,
+EPIC_CD8_Tcells,
+EPIC_Endothelial,
+EPIC_Macrophages,
+EPIC_NKcells,
+EPIC_otherCells,
+CIBERSORT_B _cells _naive,Cibersort_B_cell_naive
+CIBERSORT_B _cells _memory,Cibersort_B_cell_memory
+CIBERSORT_Plasma _cells,Cibersort_B_cell_plasma
+CIBERSORT_T _cells _CD8,Cibersort_T_cell_CD8+
+CIBERSORT_T _cells _CD4 _naive,Cibersort_T_cell_CD4+_naive
+CIBERSORT_T _cells _CD4 _memory _resting,Cibersort_T_cell_CD4+_memory_resting
+CIBERSORT_T _cells _CD4 _memory _activated,Cibersort_T_cell_CD4+_memory_activated
+CIBERSORT_T _cells _follicular _helper,Cibersort_T_cell_follicular_helper
+CIBERSORT_T _cells _regulatory _(Tregs),Cibersort_T_cell_regulatory_(Tregs)
+CIBERSORT_T _cells _gamma _delta,Cibersort_T_cell_gamma_delta
+CIBERSORT_NK _cells _resting,Cibersort_NK_cell_resting
+CIBERSORT_NK _cells _activated,Cibersort_NK_cell_activated
+CIBERSORT_Monocytes,Cibersort_Monocyte
+CIBERSORT_Macrophages _M0,Cibersort_Macrophage_M0
+CIBERSORT_Macrophages _M1,Cibersort_Macrophage_M1
+CIBERSORT_Macrophages _M2,Cibersort_Macrophage_M2
+CIBERSORT_Dendritic _cells _resting,Cibersort_Myeloid_dendritic_cell_resting
+CIBERSORT_Dendritic _cells _activated,Cibersort_Myeloid_dendritic_cell_activated
+CIBERSORT_Mast _cells _resting,Cibersort_Mast_cell_resting
+CIBERSORT_Mast _cells _activated,Cibersort_Mast_cell_activated
+CIBERSORT_Eosinophils,Cibersort_Eosinophil
+CIBERSORT_Neutrophils,Cibersort_Neutrophil
+CIBERSORT_Absolute _score,
+ESTIMATE_StromalScore,Estimate_StromalScore
+ESTIMATE_ImmuneScore,Estimate_ImmuneScore
+ESTIMATE_ESTIMATEScore,Estimate_ESTIMATEScore
+Stemness_score,
+ER_ESR1,
+PR_PGR,
+Pathway_activity_EGFR,
+Pathway_activity_Hypoxia,
+Pathway_activity_JAK.STAT,
+Pathway_activity_MAPK,
+Pathway_activity_NFkB,
+Pathway_activity_PI3K,
+Pathway_activity_TGFb,
+Pathway_activity_TNFa,
+Pathway_activity_Trail,
+Pathway_activity_VEGF,
+Pathway_activity_p53,
+TP53_ATM,
+TP53_CHEK2,
+TP53_MDM4,
+TP53_RPS6KA3,
+TP53_TP53,
+TP53_pathway,
+PI3K_AKT1,
+PI3K_AKT2,
+PI3K_AKT3,
+PI3K_DEPDC5,
+PI3K_DEPTOR,
+PI3K_INPP4B,
+PI3K_MAPKAP1,
+PI3K_MLST8,
+PI3K_MTOR,
+PI3K_NPRL2,
+PI3K_NPRL3,
+PI3K_PDK1,
+PI3K_PIK3CA,
+PI3K_PIK3CB,
+PI3K_PIK3R1,
+PI3K_PIK3R2,
+PI3K_PPP2R1A,
+PI3K_PTEN,
+PI3K_RHEB,
+PI3K_RICTOR,
+PI3K_RPS6,
+PI3K_RPS6KB1,
+PI3K_RPTOR,
+PI3K_STK11,
+PI3K_TSC1,
+PI3K_TSC2,
+PI3K_pathway,
+HRD_BRCA1,
+HRD_BRCA2,
+HRD_BRCA1_or_BRCA2,
+CNV_DEL,
+CNV_AMP,
+CNV_class,CNV_status
+CNV_idx,
+CNV_1q_DEL,
+CNV_3q_DEL,
+CNV_4q_DEL,
+CNV_1q_AMP,
+CNV_3q_AMP,
+CNV_4q_AMP,
+Purity_Immune,
+Purity_Cancer,
+Purity_Stroma,
+MSI_status,MSI_status
+POLE_subtype,POLE
+JAK1_MS_INDEL,
+JAK1_Mutation,
+Log2_variant_per_Mbp,
+Log2_SNP_per_Mbp,
+Log2_INDEL_per_Mbp,
+Log2_variant_total,
+Log2_SNP_total,
+Log2_INDEL_total,
+Genomics_subtype,Genomic_subtype
+Mutation_signature_C>A,
+Mutation_signature_C>G,
+Mutation_signature_C>T,
+Mutation_signature_T>C,
+Mutation_signature_T>A,
+Mutation_signature_T>G,
+WXS_normal_sample_type,
+WXS_normal_filename,
+WXS_normal_UUID,
+WXS_tumor_sample_type,
+WXS_tumor_filename,
+WXS_tumor_UUID,
+WGS_normal_sample_type,
+WGS_normal_UUID,
+WGS_tumor_sample_type,
+WGS_tumor_UUID,
+RNAseq_R1_sample_type,
+RNAseq_R1_filename,
+RNAseq_R1_UUID,
+RNAseq_R2_sample_type,
+RNAseq_R2_filename,
+RNAseq_R2_UUID,
+miRNAseq_sample_type,
+miRNAseq_UUID,
+Methylation_available,
+Methylation_quality,
\ No newline at end of file
diff --git a/examples/datasets/Huang.csv b/examples/datasets/Huang.csv
new file mode 100644
index 00000000..15d54431
--- /dev/null
+++ b/examples/datasets/Huang.csv
@@ -0,0 +1,111 @@
+case_id,age,gender,country,smoking_history,smoke_age_start,smoke_age_stop,num_smoke_per_day,num_pack_years_sm,smoking_second_hand,smoking_inferred,alcohol_consum,num_yrs_alc_con,tumor_site_original,tumor_site_curated,tumor_focality,tumor_size_cm,histologic_type,histologic_grade,tumor_necrosis,patho_staging_pt,patho_staging_pn,clinic_staging_dist_metas,tumor_stage,P16,HPV_inference,NAT_pathology_review,tumor_pathology_review,ESTIMATE_stromal_score,ESTIMATE_immune_score,CD3_IHC_count,stemness_score,mutation_count,neoAntigen_count,chr_instability_idx,integrated_subtype,transcriptomic_subtype
+C3L-00977,56,Male,Russia,"Current reformed smoker, years unknown",,,20,,Yes,strong_evidence,Lifelong non-drinker,,Floor of mouth,Oral cavity,Unifocal,1.2,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT1,pN1,cM0,Stage III,Not Evaluated,NO,Soft Tissue No EPI%,SCC 80%,6825.995755,7989.115925,1.0,0.953243158,106,0,2.003653988054,Basal,Mesenchymal
+C3L-00987,61,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,18,,20,43.0,Yes,strong_evidence,"Consumed alcohol in the past, but currently a non-drinker",30,Tongue,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT2,pN1,cM0,Stage III,Not Evaluated,NO,Soft Tissue No EPI;Normal No EPI%,SCC 90%;SCC 90%;SCC 80%;SCC 70%,5999.793467,4772.409716,0.0,0.825329743,83,0,5.20561174255834,CIN,Classical
+C3L-00994,50,Male,Ukraine,Current reformed smoker within past 15 years,16,50,6,10.2,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,32,Tongue,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT2,pN0,cM0,Stage II,Not Evaluated,NO,Normal EPI 10%;Normal EPI 10%%,SCC 40%;SCC 70%;SCC 40%;SCC 75%,8924.036564,8176.233903,5.0,0.664581288,67,0,1.68447466240021,Immune,Mesenchymal
+C3L-00995,56,Male,Ukraine,"Current reformed smoker, more than 15 years",25,56,20,31.0,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,31,Buccal mucosa,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT2,pN1,cM0,Stage III,Not Evaluated,NO,Normal 15%;Normal 10%%,SCC 90%;SCC 80%;SCC 40%;SCC 70%,8723.429667,8342.246345,70.0,0.539918433,64,0,1.34048331636627,Immune,Mesenchymal
+C3L-00997,47,Male,Ukraine,"Current reformed smoker, more than 15 years",12,27,20,15.0,Yes,strong_evidence,Lifelong non-drinker,,Oropharynx,Oropharynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT2,pN1,cM0,Stage II,Not Evaluated,NO,Normal 10%%,SCC 70%;SCC 70%,7025.911695,7445.251991,60.0,0.843765491,129,0,3.90636995096076,CIN,Classical
+C3L-00999,56,Male,Russia,Current smoker: Includes daily and non-daily smokers,20,,20,36.0,Yes,weak_evidence,Lifelong non-drinker,,Floor of mouth,Oral cavity,Unifocal,2.2,"Squamous cell carcinoma, conventional",G1 Well differentiated,Present,pT2,pN0,cM0,Stage II,Not Evaluated,NO,Normal 10%%,SCC 80%;SCC 70%,8510.704551,8210.549555,30.0,0.548977169,159,0,1.1488341193973,Immune,Atypical
+C3L-01138,62,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,8,,20,54.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,6.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT4a,pN1,cM0,Stage IV,Not Evaluated,NO,Normal 10%%,SCC 70%,6283.423855,6407.893478,10.0,0.890789746,187,1,3.92498169394758,Basal,Classical
+C3L-01237,57,Male,Ukraine,"Current reformed smoker, more than 15 years",17,20,40,6.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN2,cM0,Stage IV,Not Evaluated,NO,Normal 10%%,SCC 80%,7175.667725,5720.287055,30.0,0.738328103,141,1,4.28648991074802,CIN,Classical
+C3L-02617,64,Male,Bulgaria,Current smoker: Includes daily and non-daily smokers,20,,30,66.0,Yes,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,20,Larynx,Larynx,Unifocal,6.0,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Present,pT2,pNX,Staging Incomplete,Stage II,Not Evaluated,NO,Normal 10%;Normal 15%;SCC 90%%,SCC 90%;SCC 80%;SCC 80%;SCC 80%;SCC 80%,6346.815584,6958.57339,,0.906466019,470,1,4.74481756440743,CIN,Classical
+C3L-02621,68,Male,Bulgaria,Current smoker: Includes daily and non-daily smokers,38,,30,45.0,Yes,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,20,Larynx,Larynx,Multifocal,0.6,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Present,pT1,pNX,Staging Incomplete,Stage I,Unknown,NO,Normal 10%;Normal 10%;Normal 10%%,SCC 70%;SCC 70%;SCC 70%;SCC 70%;SCC 65%,6806.384264,8039.787184,,0.754949822,358,0,1.77452097480929,CIN,Atypical
+C3L-02651,81,Male,Bulgaria,Current smoker: Includes daily and non-daily smokers,29,,30,78.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,2.5,Keratinizing Squamous Cell Carcinoma,G2 Moderately differentiated,Present,pT2 ,pNX,cMX,Stage II,Not Evaluated,NO,Normal 40%;Normal 20%;Normal 15%%,SCC 70%;SCC 70%;SCC 70%;SCC 70%;SCC 70%,7397.640049,8476.382275,,0.6098734,402,0,2.00013239240149,,Mesenchymal
+C3L-03378,54,Male,Russia,Current smoker: Includes daily and non-daily smokers,20,,20,34.0,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Oral cavity,Oral cavity,Unifocal,1.8,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pN2c,cM0,Stage IV,Not Evaluated,NO,Normal 15%%,SCC (PD) 70%,6878.201534,6924.419568,60.0,0.835631289,211,0,3.94351240948061,CIN,Classical
+C3L-04025,69,Male,Bulgaria,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,No or minimal exposure to secondhand smoke,weak_evidence,,,Buccal mucosa,Oral cavity,Unifocal,1.5,,G2 Moderately differentiated,Not identified,pT1b,pNX,,Stage I,,NO,SCC 70%;Normal 10%;Normal 30%%,SCC 70%;WDSCC 65%;WDSCC 70%;Verrucous Non-Invasive 40%;WDSCC 70%,7562.669184,7805.771365,,0.519337914,30,0,0.379356903896177,Immune,Mesenchymal
+C3L-04354,60,Male,Bulgaria,Current smoker: Includes daily and non-daily smokers,30,,6,9.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,2.0,Non-Keratinized Squamous Cell Carcinoma,G3 Poorly differentiated,Present,pT2 ,pNX,cMX,Stage II,Not Evaluated,NO,Normal 20%;Normal No EPI;Normal 5%%,SCC 75%;SCC 50%;SCC 55%;SCC 70%;SCC 75%,7228.703512,8055.848268,,0.618746239,180,0,3.70515533981302,CIN,Atypical
+C3L-04791,81,Male,Bulgaria,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,No or minimal exposure to secondhand smoke,weak_evidence,Lifelong non-drinker,,Larynx,Larynx,Unifocal,3.0,Keratinizing Squamous Cell Carcinoma,G2 Moderately differentiated,Present,pT1,pNX,cMX,Stage I,Not Evaluated,NO,Normal Thyroid;Normal Thyroid;Normal No EPI%,SCC 70%;SCC 75%;SCC 75%;SCC 70%;SCC 65%,6837.579096,7804.981646,,0.648402377,478,0,1.62512892038396,Immune,Mesenchymal
+C3L-04844,61,Female,TSS did not collect,Current smoker: Includes daily and non-daily smokers,21,,,,Exposure to secondhand smoke history not available,weak_evidence,,,Tonsil,Oropharynx,Unifocal,6.0,,G2 Moderately differentiated,Not identified,pT4a,pN3,,Stage IV,,NO,Normal 70$ EPI%,SCC 75%;SCC 70%,5714.862795,5923.495615,40.0,0.776764448,306,1,5.33285310739406,Basal,Classical
+C3L-04849,63,Male,TSS did not collect,Current smoker: Includes daily and non-daily smokers,18,,,,Exposure to secondhand smoke history not available,strong_evidence,,,Floor of mouth,Oral cavity,Unifocal,4.7,,G2 Moderately differentiated,Present,pT4a,pN3,,Stage IV,,NO,Hyperplasia 70%%,SCC 70%;SCC 75%,5757.546725,5841.490604,60.0,0.914700396,133,0,3.88309123002414,Basal,Basal
+C3N-00204,63,Female,Bulgaria,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,strong_evidence,Lifelong non-drinker,,Larynx,Larynx,Unifocal,3.2,Adenosquamous carcinoma,G3 Poorly differentiated,Not identified,pT3,pN1,cM0,Stage III,Not Evaluated,NO,Normal 15%;Normal 5%%,SCC 80%;PDSCC 80%;PDSCC 70%;PDSCC 75%,4141.503954,5229.928241,60.0,0.953230425,920,0,5.11464835682004,CIN,Atypical
+C3N-00295,53,Female,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,2.2,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pNX,cM0,Stage III,Negative,NO,Normal 40%%,SCC 40%;SCC 70%;SCC 65%;SCC 10%,4752.639319,4989.340444,,0.736752931,319,0,1.3612300289131,,Atypical
+C3N-00297,69,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Negative,NO,Normal no EPI%,SCC 80%;SCC 70%;SCC 80%;SCC 80%,5822.516855,6936.01723,,0.541769935,125,0,1.94171116940674,Basal,Basal
+C3N-00299,54,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption history not available,,Lip,Lip,Unifocal,3.1,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Negative,NO,Normal 0% EPI%,SCC 80%;SCC 80%;C3N-00299-23;SCC 65%,5342.360428,8009.515355,,0.848396164,1065,0,1.15797080609525,Basal,Basal
+C3N-00306,50,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN0,cM0,Stage III,Negative,NO,Normal 20% EPI%,SCC 50%;SCC 65%;SCC 65%;SCC 45%,6485.079143,7685.055639,,0.567681993,128,0,1.76612222075637,Immune,Atypical
+C3N-00307,70,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.0,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Not identified,pT4a,pNX,cM0,Stage IV,Negative,NO,Normal 40%%,SCC 70%;SCC 70%;SCC 70%;SCC 50%,7021.274977,7008.902307,,0.537980522,104,0,2.1452103279025,CIN,Mesenchymal
+C3N-00498,63,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Oral cavity,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN1,cM0,Stage III,Positive (>70% nuclear and cytoplasmic staining),NO,Normal 15%%,SCC 45%;SCC 75%;SCC 75%;SCC 60%,5288.872044,4377.867323,,0.790722164,168,0,4.00349897691078,CIN,Classical
+C3N-00519,67,Male,Poland,"Current reformed smoker, more than 15 years",27,67,20,40.0,Exposure to secondhand smoke history not available,weak_evidence,"Consumed alcohol in the past, but currently a non-drinker",5,Larynx,Larynx,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Negative,NO,Normal 15%%,SCC 70%;SCC 70%;SCC 70%;SCC 70%,7349.981429,8246.069477,,0.601852135,177,0,1.02047609395444,Immune,Atypical
+C3N-00822,53,Female,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN0,cM0,Stage III,Negative,NO,SCC 75%%,SCCPD 80%;SCC 75%,4080.16184,4864.02235,50.0,0.928850145,87,0,3.84953493276764,CIN,Classical
+C3N-00825,53,Male,Poland,Current smoker: Includes daily and non-daily smokers,30,,22,25.3,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption history not available,,Tongue,Oral cavity,Multifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Negative,NO,Muscle 0%%,SCC 55%;SCC 70%,8446.53344,8615.897442,90.0,0.709392947,55,0,1.35009015654592,Immune,Mesenchymal
+C3N-00828,67,Male,Poland,"Current reformed smoker, more than 15 years",16,66,20,50.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pNX,cM0,Stage II,Negative,NO,Normal 0%%,SCC 70%;SCC 70%;SCC 60%;SCC 40%,5428.79908,6486.859053,,0.736569263,326,0,2.37739360499887,CIN,Atypical
+C3N-00829,53,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Tongue,Oral cavity,Unifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN2b,cM0,Stage IV,Negative,NO,Normal 20%%,SCC 5%;SCC 10%;SCC 50%;SCC 45%,5642.230086,5786.945507,,0.785402648,173,0,3.85563750134117,CIN,Classical
+C3N-00846,67,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,37,,10,15.0,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,2.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT1,pNX,cM0,Stage I,Negative,NO,Normal 5%%,SCC 40%;SCC 45%;SCC 40%,8456.142513,9176.55781,,0.444508155,71,0,0.716663307509181,Immune,Mesenchymal
+C3N-00857,64,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN1,cM0,Stage IV,Not Evaluated,NO,Normal 25%%,SCC 60%;SCC 60%,5797.846142,4785.847019,1.0,0.862813515,175,0,7.02195799661206,CIN,Classical
+C3N-00871,58,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Oral cavity,Oral cavity,Unifocal,5.2,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 0%%,SCC 70%;SCC 75%,6278.855658,5085.878714,5.0,0.729252757,140,0,4.90384927675463,Basal,Classical
+C3N-01337,52,Male,Poland,Current smoker: Includes daily and non-daily smokers,30,,12,13.2,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,5.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT4,pN2b,cM0,Stage IV,Negative,NO,Normal 15%%,SCC 80%;SCC 75%,5650.929868,7269.184407,10.0,0.768039988,133,0,1.97256829789542,Basal,Atypical
+C3N-01338,50,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN1,cM0,Stage III,Negative,NO,Normal Cartilage%,SCC 75%;SCC 75%,6117.170351,6477.301118,20.0,0.982769201,265,0,2.74001252017641,Basal,Classical
+C3N-01339,68,Female,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.4,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Present,pT4,pN1,cM0,Stage IV,Not Evaluated,NO,SCC 70%%,SCC 75%;SCC 70%,2933.655697,3813.652536,10.0,0.623259258,234,1,7.29780757538092,CIN,Classical
+C3N-01340,72,Male,Poland,"Current reformed smoker, more than 15 years",Unknown,Unknown,Unknown,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,Normal 15%%,SCC 70%;SCC 70%,7441.043447,7793.863113,40.0,0.628752325,91,1,2.1182168893068,,Atypical
+C3N-01620,56,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,4.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN0,cM0,Stage III,Negative,NO,Normal 20%%,SCC 10%;SCC 60%,7332.918086,7128.615543,10.0,0.675434802,163,1,2.73819535791519,Basal,Mesenchymal
+C3N-01643,68,Male,Poland,"Current reformed smoker, more than 15 years",25,52,40,54.0,No or minimal exposure to secondhand smoke,weak_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,2.6,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN0,cM0,Stage III,Not Evaluated,NO,,WDSCC 70%;WDSCC 70%,6013.691797,5045.364024,30.0,0.556834589,5,0,,,Atypical
+C3N-01645,71,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN0,cM0,Stage III,Negative,NO,Normal 20%%,SCC 75%;SCC 70%,7001.081675,8716.71833,50.0,0.856966154,100,0,1.78347187054584,,Mesenchymal
+C3N-01752,73,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Floor of mouth,Oral cavity,Unifocal,5.6,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,,WDSCC 65%;WDSCC 70%,5441.75976,4353.059497,1.0,0.967646007,127,0,5.62117793084445,Basal,Basal
+C3N-01754,64,Male,Poland,Current reformed smoker within past 15 years,22,63,20,41.0,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,4.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT3,pN0,cM0,Stage III,Not Evaluated,NO,Normal 50%%,SCC 20%;SCC 70%;SCC 70%;SCC 40%,6759.178398,6021.776653,10.0,0.839918856,104,0,4.15894563931942,Basal,Basal
+C3N-01755,68,Male,Poland,"Current reformed smoker, years unknown",,,20,,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,5.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,PN1,cM0,Stage III,Not Evaluated,NO,,SCC 40%;SCC 75%;SCC 75%,6898.079644,6962.033648,20.0,0.68876914,110,0,4.51263623323375,CIN,Mesenchymal
+C3N-01756,62,Male,Poland,Current smoker: Includes daily and non-daily smokers,17,,10,22.5,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Hypopharynx,Unifocal,4.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN2c,cM0,Stage IV,Not Evaluated,NO,Normal 20%%,SCC 70%,5657.98446,4670.155568,20.0,0.878652596,100,1,4.95775000978713,CIN,Classical
+C3N-01757,64,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,2.7,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pNx,cM0,Stage II,Not Evaluated,NO,Normal 40%%,SCC 40%;Poor Quality 70%;Poor Quality 70%;SCC 70%,6179.545402,7014.534802,50.0,0.790684392,153,0,1.96924161753749,Basal,Basal
+C3N-01758,59,Male,Poland,Current reformed smoker within past 15 years,19,59,15,30.0,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pN1,cM0,Stage IV,Not Evaluated,NO,Normal 10%%,SCC 75%;SCC 70%;SCC 75%,8044.356402,7692.143579,60.0,0.606988683,71,0,2.49177658079283,Basal,Mesenchymal
+C3N-01858,62,Male,Poland,Current reformed smoker within past 15 years,21,62,30,61.5,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,15,Larynx,Larynx,Unifocal,3.2,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN3b,cM0,Stage IV,Not Evaluated,NO,Normal 20%%,SCC 60%;SCC 75%,6267.242797,7198.497771,50.0,0.901155495,1349,4,0.944560823285874,,Classical
+C3N-01859,67,Male,Poland,Current reformed smoker within past 15 years,17,57,20,40.0,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,3.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pN2c,cM0,Stage IV,Not Evaluated,NO,Normal 30%%,SCC 70%,6152.014132,8019.669666,60.0,0.817627441,239,1,4.16638343784538,CIN,Atypical
+C3N-01943,63,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,5.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN3a,cM0,Stage IV,Not Evaluated,NO,Normal 60%%,SCC 70%;PDSCC 75%,4803.753271,7256.040173,80.0,0.872400321,131,1,2.5047317619498,CIN,Atypical
+C3N-01944,65,Male,Poland,Current smoker: Includes daily and non-daily smokers,20,,20,45.0,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,3.5,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Present,pT4a,pN1,cM0,Stage IV,Not Evaluated,NO,Normal 10%%,SCC 50%,7719.467126,7379.857472,40.0,0.668452661,106,0,2.62992562733775,Basal,Mesenchymal
+C3N-01945,61,Male,Poland,"Current reformed smoker, more than 15 years",Unknown,Unknown,20,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN1,cM0,Stage III,Not Evaluated,NO,Normal 1%%,Verrucous Non-Invasive 75%;SCC 70%,6931.64948,6694.529778,30.0,0.801326397,96,0,2.04989486079862,CIN,Classical
+C3N-01946,64,Male,Poland,Current reformed smoker within past 15 years,Unknown,Unknown,Unknown,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Larynx,Larynx,Unifocal,1.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Not Evaluated,NO,Normal 60%%,SCC 90%,7320.801741,9805.370561,70.0,0.66500437,1273,3,1.27788803688656,Immune,Mesenchymal
+C3N-01947,49,Male,Poland,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Unifocal,5.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 50%%,SCC 90%,8369.540985,9086.657983,80.0,0.599903474,76,0,1.83309695660605,Immune,Mesenchymal
+C3N-01948,55,Female,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption history not available,,Tongue,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 5%%,SCC 65%;SCC 70%,7706.958045,9307.146827,80.0,0.629910425,66,0,0.915735252846995,Immune,Atypical
+C3N-02275,69,Male,Poland,Smoking history not available,,,,,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption history not available,,Larynx,Larynx,Unifocal,3.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,Normal 10%%,SCC 50%;SCC 70%,6171.891354,8593.664692,50.0,0.749940074,747,5,1.74267053678057,Basal,Mesenchymal
+C3N-02279,59,Male,Poland,Current reformed smoker within past 15 years,20,59,20,39.0,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT3,pN0,cM0,Stage III,Not Evaluated,NO,Normal 10%%,SCC 50%,6815.19623,7399.477079,70.0,0.835648725,180,0,2.60095072588305,Basal,Basal
+C3N-02333,61,Male,Ukraine,Current reformed smoker within past 15 years,Unknown,59,20,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pNX,cM0,Stage IV,Negative,NO,Normal 5%%,SCC 85%;SCC 85%;SCC 75%,7348.023786,9022.349073,50.0,0.526531556,345,0,0.623878944989433,Immune,Mesenchymal
+C3N-02693,42,Male,Poland,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,No or minimal exposure to secondhand smoke,weak_evidence,Lifelong non-drinker,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN0,cM0,Stage III,Not Evaluated,YES,Normal 40%%,SCC 85%,5143.846834,7273.096915,70.0,0.805646109,178,1,1.66689666383698,,Atypical
+C3N-02694,57,Male,Poland,"Current reformed smoker, years unknown",,,15,,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,2.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN2b,cM0,Stage IV,Not Evaluated,NO,,SCC 85%;SCC 50%,8945.001701,9361.656935,,0.515325924,80,0,1.44569881644837,Immune,Mesenchymal
+C3N-02695,45,Male,Poland,Current smoker: Includes daily and non-daily smokers,17,,12,16.8,No or minimal exposure to secondhand smoke,weak_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Multifocal,5.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT3,pN0,cM0,Stage III,Not Evaluated,NO,Normal 5%%,SCC 80%;SCC 80%,5937.036833,7022.660147,,0.755947066,87,0,2.04458209841523,Basal,Basal
+C3N-02700,80,Female,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Lip,Lip,Unifocal,3.5,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT2,pNX,cM0,Stage II,Positive (>70% nuclear and cytoplasmic staining),NO,Normal 5%%,SCC 80%;SCC 70%;SCC 80%,5691.04835,8128.32296,40.0,0.764721737,1372,2,2.23034641623138,Basal,Basal
+C3N-02713,64,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,19,,20,45.0,Exposure to secondhand smoke history not available,strong_evidence,Lifelong non-drinker,,Larynx,Larynx,Unifocal,3.3,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Present,pT3,pN2,cM0,Stage IV,Not Evaluated,NO,Normal 10%%,SCC 70%;SCC 80%;PDSCC 80%,5396.144947,5220.095945,60.0,0.797822192,638,0,5.22744392960231,CIN,Classical
+C3N-02714,23,Male,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Tongue,Oral cavity,Unifocal,2.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT2,pNX,cM0,Stage II,Positive (>70% nuclear and cytoplasmic staining),NO,Normal 15%%,PDSCC 70%;SCC 25%;SCC 75%,7581.433954,8894.705175,80.0,0.612150111,54,0,0.969092228014795,Immune,Mesenchymal
+C3N-02716,76,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,Unknown,,Unknown,,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Lip,Lip,Unifocal,1.2,"Squamous cell carcinoma, conventional",G1 Well differentiated,Present,pT1,pNX,cM0,Stage I,Positive (>70% nuclear and cytoplasmic staining),NO,,SCC 70%;SCC 45%,7083.32021,7778.97507,40.0,0.600350747,1270,1,0.105650080165848,Immune,Atypical
+C3N-02727,49,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,Unknown,,20,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,3.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pNX,cM0,Stage IV,Negative,NO,Normal Cartilage 0%%,SCC 60%;SCC 70%;SCC 90%,8801.307424,9307.036194,60.0,0.471334213,33,0,1.15493404518317,Immune,Mesenchymal
+C3N-02730,40,Male,China,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Tongue,Oral cavity,Unifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT1,pN1,cM0,Stage III,Not Evaluated,NO,Normal 5%%,SCC 75%;SCC 75%,8943.570062,9868.225196,,0.55895911,18,1,0.839975841114401,Immune,Mesenchymal
+C3N-02925,68,Female,China,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Tongue,Oral cavity,Unifocal,4.2,Keratinizing squamous cell carcinoma,G2 Moderately differentiated,Not identified,pT3,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 0%%,SCC 85%;SCC 70%,7089.288448,9904.398057,,0.645009581,36,0,1.27973286026081,Immune,Basal
+C3N-03008,65,Female,Poland,Current smoker: Includes daily and non-daily smokers,20,,10,22.5,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,5,Floor of mouth,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN2c,cM0,Stage IV,Not Evaluated,NO,Normal 40%%,SCC 70%,7532.452338,6864.752029,5.0,0.809266623,156,0,3.78656476415432,Basal,Mesenchymal
+C3N-03009,58,Male,Poland,Current smoker: Includes daily and non-daily smokers,16,,15,31.5,Yes,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,Unknown,Floor of mouth,Oral cavity,Unifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT1,pN0,cM0,Stage I,Not Evaluated,NO,Normal 35%%,SCC 75%;SCC sub 5%,6797.453417,5131.34755,60.0,0.836561377,129,0,5.10284989127249,CIN,Classical
+C3N-03011,62,Male,Poland,Current reformed smoker within past 15 years,22,60,20,38.0,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,2.0,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Not identified,pT2,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 35%%,SCC 80%;SCC 70%;SCC 75%,5274.918068,6830.57982,40.0,0.831950566,67,0,5.05942043387248,CIN,Atypical
+C3N-03012,71,Male,Poland,"Current reformed smoker, years unknown",,,20,,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN0,cM0,Stage IV,Unknown,NO,Normal 35%%,Normal 35%,6716.016019,4492.50941,0.0,0.83083229,343,0,6.12072466423461,CIN,Classical
+C3N-03013,59,Male,Poland,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pNx,cM0,Stage II,Not Evaluated,NO,Normal 10%%,SCC 65%,8855.514614,8805.211906,50.0,0.526153678,67,0,0.965558835675723,Immune,Atypical
+C3N-03015,61,Male,Poland,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Unifocal,1.0,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Not identified,pT4a,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 10%%,SCC 75%,7041.91195,8271.449708,70.0,0.898778348,192,0,1.79904050588596,Basal,Atypical
+C3N-03027,65,Male,Poland,Current smoker: Includes daily and non-daily smokers,17,,20,48.0,No or minimal exposure to secondhand smoke,weak_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,8,Floor of mouth,Oral cavity,Unifocal,1.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pNx,cM0,Stage II,Not Evaluated,NO,Normal 35%%,SCC 75%,5566.75901,7101.798643,80.0,0.808283426,118,0,2.7927541433259,CIN,Atypical
+C3N-03028,59,Male,Poland,Current smoker: Includes daily and non-daily smokers,20,,20,39.0,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,Unknown,Floor of mouth,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 35%%,SCC 80%,7353.736545,7831.873692,30.0,0.79897701,95,0,3.72064406197502,Basal,Mesenchymal
+C3N-03042,67,Female,United States,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,,,Oral cavity,Oral cavity,Unifocal,5.2,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,T4a,N3b,M0,Stage IV,Negative,NO,SCC 30%;SCC 20%%,SCC 80%;SCC 75%;SCC 75%;SCC 75%,6460.922624,5907.724612,40.0,0.822779851,98,0,2.52459111000182,Basal,Mesenchymal
+C3N-03045,77,Male,United States,Smoking history not available,,,,,Exposure to secondhand smoke history not available,strong_evidence,,,Tongue,Oral cavity,Unifocal,2.2,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,T2,N1,m0,Stage III,Negative,NO,Normal 10%%,SCC 75%,4932.8686,6125.854309,10.0,0.919189035,123,0,3.49683744220641,Basal,Basal
+C3N-03226,54,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,19,,20,35.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN2b,cM0,Stage IV,Not Evaluated,NO,Normal 5%%,SCC 80%;SCC 70%;SCC 70%,7725.028018,8410.581778,50.0,0.670113161,41,0,0.773082844406719,Immune,Atypical
+C3N-03433,76,Male,Poland,Current smoker: Includes daily and non-daily smokers,22,,15,40.5,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,2.4,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Not Evaluated,NO,Normal%,SCC 7,6426.336016,7166.765405,60.0,0.755758699,459,0,4.44642896396794,CIN,Classical
+C3N-03456,64,Male,Poland,Current smoker: Includes daily and non-daily smokers,23,,20,41.0,Yes,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,Unknown,Floor of mouth,Oral cavity,Unifocal,7.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4,pN0,cM0,Stage IV,Not Evaluated,NO,,SCC 6,6439.29269,5545.333423,50.0,0.809361582,92,0,4.62246547991071,CIN,Classical
+C3N-03457,71,Male,Poland,Current reformed smoker within past 15 years,22,71,15,36.8,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Unifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Not Evaluated,NO,Normal%,SCC 70,9627.987701,9020.741798,30.0,0.468432263,236,0,1.66045021314332,Immune,Mesenchymal
+C3N-03458,70,Male,Poland,Current smoker: Includes daily and non-daily smokers,25,,,,Yes,weak_evidence,,,Floor of mouth,Oral cavity,Unifocal,2.0,,G1 Well differentiated,Not identified,pT2,pN0,,Stage II,,NO,Normal%,SCC 65,6027.079084,6746.609023,20.0,0.896479674,124,1,2.48177344048731,Basal,Atypical
+C3N-03487,53,Female,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Alveolar ridge,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,Normal%,SCC 8;SCC 80%;SCC 80%,8134.452066,9299.472289,90.0,0.577214475,81,0,0.935948972579417,Immune,Mesenchymal
+C3N-03488,64,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,19,,20,45.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Other,Hypopharynx,Unifocal,5.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT3,pNX,cM0,Stage III,Not Evaluated,NO,Normal 20%;SCC 75%%,SCC 80%;SCC 35%,7020.822857,5614.264427,50.0,0.862958537,113,0,5.16260413236008,Basal,Mesenchymal
+C3N-03490,64,Male,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,strong_evidence,Lifelong non-drinker,,Base of tongue,Oropharynx,Unifocal,4.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pN0,cM0,Stage III,Not Evaluated,NO,N 10%%,SCC 70%;SCC 75%;SCC 80%,6984.498413,6145.297042,30.0,0.816379845,136,1,5.88394907689529,CIN,Classical
+C3N-03612,51,Male,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Oropharynx,Oropharynx,Unifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT3,pNX,cM0,Stage III,Not Evaluated,NO,N 10%%,PDSCC 15%;SCC 35%;SCC 65%,5983.808904,10678.10984,60.0,0.442356796,1,0,0.0997933869043518,Immune,Atypical
+C3N-03619,59,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,24,,20,35.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT2,pN0,cM0,Stage II,Not Evaluated,NO,N 5%%,SCC 60%;SCC 25%;SCC 70%,7000.525344,7267.440427,50.0,0.723910132,90,0,2.60534793715202,,Atypical
+C3N-03620,61,Male,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Tongue,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pNX,cM0,Stage II,Not Evaluated,NO,N 5%%,SCC 70%;PDSCC 10;SCC 10%,8784.023801,9562.408984,90.0,0.495209781,14,0,0.852501028951453,Immune,Atypical
+C3N-03664,58,Male,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,strong_evidence,Lifelong non-drinker,,Floor of mouth,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN2c,cM0,Stage IV,Not Evaluated,NO,N 5%%,SCC 65%;SCC 80%;SCC 75%,6235.504976,5794.499567,20.0,0.942451279,165,1,5.70162561583804,Basal,Atypical
+C3N-03781,68,Male,Poland,"Current reformed smoker, more than 15 years",28,50,20,22.0,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,N 20%%,SCC 60%,7985.804043,7107.47205,70.0,0.659516899,193,0,2.43825988238678,,Mesenchymal
+C3N-03782,50,Female,Poland,Current smoker: Includes daily and non-daily smokers,20,,20,30.0,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,N 10%%,SCC 65%,5188.792619,4993.8734,50.0,0.862910851,95,0,4.84870972785454,Basal,Atypical
+C3N-03783,67,Male,Poland,Current smoker: Includes daily and non-daily smokers,17,,20,50.0,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Alveolar ridge,Oral cavity,Unifocal,2.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT2,pN2b,cM0,Stage IV,Not Evaluated,NO,N lymphoid B%,SCC 70%,8012.368622,8451.386202,80.0,0.747829853,123,1,2.66572270641339,,Mesenchymal
+C3N-03785,66,Male,Poland,Current smoker: Includes daily and non-daily smokers,20,,4,9.2,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Tongue,Oral cavity,Multifocal,1.2,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT1,pNx,cM0,Stage I,Not Evaluated,NO,N 3%%,SCC(PD) 60,3037.218187,4178.197713,1.0,0.892066902,153,0,7.40131209605744,CIN,Atypical
+C3N-03837,54,Male,China,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Lip,Lip,Unifocal,4.5,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT3,pN0,cM0,Stage III,Not Evaluated,NO,N 35%%,SCC 80%;SCC 80%,6229.92831,8003.79671,,0.720749789,459,0,0.701179286406038,,Basal
+C3N-03841,59,Male,China,Current smoker: Includes daily and non-daily smokers,25,,20,34.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,30,Other,Larynx,Unifocal,3.0,"Squamous cell carcinoma, conventional",G3 Poorly differentiated,Not identified,pT3,pN1,cM0,Stage III,Not Evaluated,NO,N 15%%,SCC 75%;SCC 75%,7057.917607,9004.291325,,0.684330926,96,0,3.98363706191443,CIN,Mesenchymal
+C3N-03849,55,Male,China,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Larynx,Larynx,Unifocal,7.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT4a,pN2b,cM0,Stage IV,Not Evaluated,NO,N 15%%,PDCasub 70%;SCC 25%,6188.218272,6250.864705,,0.861893118,118,0,3.41718750957631,CIN,Atypical
+C3N-03876,65,Male,China,"Current reformed smoker, more than 15 years",22,44,20,22.0,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption more than 2 drinks per day for men and more than 1 drink per day for women,20,Larynx,Larynx,Unifocal,2.5,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT3,pN2b,cM0,Stage IV,Unknown,NO,N 15%%,PDCa 70%;SCC 80%,5555.585958,5729.003577,,1.0,151,0,5.49628737944634,Basal,Classical
+C3N-03878,52,Male,China,Current smoker: Includes daily and non-daily smokers,32,,10,10.0,Exposure to secondhand smoke history not available,strong_evidence,Lifelong non-drinker,,Larynx,Larynx,Unifocal,2.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT3,pN1,cM0,Stage III,Not Evaluated,NO,N 10%%,SCC 70%;SCC 80%,6917.552525,6868.706676,,0.65847816,80,0,3.00168256433835,CIN,Classical
+C3N-03888,58,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,18,,20,40.0,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,2.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT3,pNX,cM0,Stage III,Not Evaluated,NO,N 20%%,SCC 30%;PDCa 70%;SCC 40%,4789.748963,6802.39559,30.0,0.857808178,125,0,4.02272702037636,CIN,Atypical
+C3N-03889,53,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,23,,20,30.0,Exposure to secondhand smoke history not available,weak_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Alveolar ridge,Oral cavity,Unifocal,4.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,N 10%%,SCC 75%;SCC 80%;SCC 70%,7941.455222,7219.722156,20.0,0.596663601,91,0,2.43824716654285,Immune,Mesenchymal
+C3N-03928,60,Female,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Tongue,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Present,pT2,pN1,cM0,Stage III,Not Evaluated,NO,N 15%%,SCC 70%;SCC 40%;SCC 70%,7661.031806,9098.218537,50.0,0.625748343,61,0,1.84477084641962,Immune,Basal
+C3N-03933,70,Female,Ukraine,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,weak_evidence,Lifelong non-drinker,,Floor of mouth,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT2,pN1,cM0,Stage III,Not Evaluated,NO,N 15%%,SCC 50%;SCC 70%;SCC 75%,7146.623165,7900.354223,80.0,0.689933031,87,1,5.48543676505158,Basal,Basal
+C3N-04152,54,Male,Armenia,Lifelong non-smoker: Less than 100 cigarettes smoked in lifetime,,,,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Oropharynx,Oropharynx,Unifocal,4.5,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT4a,pN2b,cM0,Stage IV,Not Evaluated,NO,N 5%%,SCC PD 60%;PDSCC 75%;PDC 70%,5153.497308,4863.601322,70.0,0.94586735,80,0,6.51261649605658,CIN,Atypical
+C3N-04273,73,Male,Poland,Current reformed smoker within past 15 years,30,68,20,38.0,No or minimal exposure to secondhand smoke,strong_evidence,"Consumed alcohol in the past, but currently a non-drinker",Unknown,Oropharynx,Oropharynx,Unifocal,2.0,Basaloid squamous cell carcinoma,G2 Moderately differentiated,Present,pT2,pN2b,cM0,Stage IV,Not Evaluated,NO,N 0%%,PDCa 75%;PDCa 65%,4786.757912,4495.994769,40.0,0.746277202,111,0,7.02321458831796,CIN,Atypical
+C3N-04275,61,Male,Poland,Current smoker: Includes daily and non-daily smokers,20,,20,41.0,Yes,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,2.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT2,pN0,cM0,Stage II,Not Evaluated,NO,N 35%%,SCC 70%,6635.873278,8210.346626,50.0,0.451905628,82,0,1.15390024252283,Immune,Atypical
+C3N-04276,68,Male,Poland,Current reformed smoker within past 15 years,16,65,15,36.8,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,5.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT4a,pN0,cM0,Stage IV,Not Evaluated,NO,N 10%%,PDCa 70%,6821.83286,5143.929607,5.0,0.51276159,165,0,5.19925706608648,CIN,Mesenchymal
+C3N-04277,72,Male,Poland,Current smoker: Includes daily and non-daily smokers,25,,10,23.5,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,1.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT3,pNx,cM0,Stage III,Not Evaluated,NO,N 30%%,SCC 75%,7772.142353,8589.873314,70.0,0.57517292,112,1,1.44584918655384,Immune,Mesenchymal
+C3N-04278,71,Male,Poland,Current reformed smoker within past 15 years,30,68,15,28.5,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,4.0,Basaloid squamous cell carcinoma,G2 Moderately differentiated,Not identified,pT3,pNx,cM0,Stage III,Not Evaluated,NO,N 25%%,PD 70%,6713.95651,7074.622965,90.0,0.831636557,289,0,4.49836532312063,CIN,Classical
+C3N-04279,65,Male,Poland,Current smoker: Includes daily and non-daily smokers,20,,15,33.8,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Floor of mouth,Oral cavity,Unifocal,3.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Not identified,pT2,pN0,cM0,Stage II,Not Evaluated,NO,N 20%%,PDCa 70%,4913.19811,5469.762929,20.0,0.862528998,183,0,5.76658394099771,Basal,Classical
+C3N-04280,65,Male,Poland,Current reformed smoker within past 15 years,18,28,20,10.0,No or minimal exposure to secondhand smoke,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,4.0,"Squamous cell carcinoma, conventional",G1 Well differentiated,Not identified,pT2,pN0,cM0,Stage II,Not Evaluated,NO,,Veracan 80%,5615.887039,5361.231467,50.0,0.694324729,77,0,1.66514548259798,Basal,Basal
+C3N-04611,55,Male,Ukraine,Current smoker: Includes daily and non-daily smokers,Unknown,,20,,Exposure to secondhand smoke history not available,strong_evidence,Alcohol consumption equal to or less than 2 drinks per day for men and 1 drink or less per day for women,,Larynx,Larynx,Unifocal,6.0,"Squamous cell carcinoma, conventional",G2 Moderately differentiated,Present,pT3,pNX,cM0,Stage III,Not Evaluated,NO,N 5%%,PDCa 70%;PDCa 70%;PDCa 80%,8445.538834,8745.826501,90.0,0.680310507,107,0,1.7013063766819,Immune,Mesenchymal
diff --git a/examples/demo-ground-truth.ipynb b/examples/demo-ground-truth.ipynb
new file mode 100644
index 00000000..626695b5
--- /dev/null
+++ b/examples/demo-ground-truth.ipynb
@@ -0,0 +1,3314 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.3'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'tabulator': 'https://cdn.jsdelivr.net/npm/tabulator-tables@5.5.0/dist/js/tabulator.min', 'moment': 'https://cdn.jsdelivr.net/npm/luxon/build/global/luxon.min'}, 'shim': {}});\n require([\"tabulator\"], function(Tabulator) {\n\twindow.Tabulator = Tabulator\n\ton_load()\n })\n require([\"moment\"], function(moment) {\n\twindow.moment = moment\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 2;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window.Tabulator !== undefined) && (!(window.Tabulator instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.5/dist/bundled/datatabulator/tabulator-tables@5.5.0/dist/js/tabulator.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window.moment !== undefined) && (!(window.moment instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.5/dist/bundled/datatabulator/luxon/build/global/luxon.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.holoviz.org/panel/1.4.5/dist/bundled/datatabulator/tabulator-tables@5.5.0/dist/js/tabulator.min.js\", \"https://cdn.holoviz.org/panel/1.4.5/dist/bundled/datatabulator/luxon/build/global/luxon.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.3.min.js\", \"https://cdn.holoviz.org/panel/1.4.5/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [\"https://cdn.holoviz.org/panel/1.4.5/dist/bundled/datatabulator/tabulator-tables@5.5.0/dist/css/tabulator_simple.min.css?v=1.4.5\"];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));",
+ "application/vnd.holoviews_load.v0+json": ""
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n",
+ "application/vnd.holoviews_load.v0+json": ""
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.holoviews_exec.v0+json": "",
+ "text/html": [
+ "
\n",
+ ""
+ ]
+ },
+ "metadata": {
+ "application/vnd.holoviews_exec.v0+json": {
+ "id": "3f8fdd9c-53b4-4204-9ce7-100e5a7d3b23"
+ }
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import bdikit as bdi\n",
+ "import pandas as pd\n",
+ "from IPython.display import display, Markdown\n",
+ "pd.set_option('display.max_rows', None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_source = pd.read_csv(\"./datasets/Dou-ucec-discovery.csv\")\n",
+ "df_target = pd.read_csv(\"./datasets/Dou-ucec-confirmatory.csv\")\n",
+ "df_gt = pd.read_csv(\"./datasets/Dou-ucec-ground-truth.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_ground_truth = df_gt[~df_gt['target'].isna()]\n",
+ "gt_set = df_ground_truth.set_index('source')['target'].to_dict()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def accuracy(gt_set, schema_mapping):\n",
+ " schema_dict = schema_mapping.set_index('source')['target'].to_dict()\n",
+ " correct_count = 0\n",
+ " total = 0\n",
+ " for source_column, target_column in schema_dict.items():\n",
+ " if source_column in gt_set:\n",
+ " total += 1\n",
+ " correct_target_columns = set(gt_set[source_column].split(\",\"))\n",
+ " if target_column in correct_target_columns:\n",
+ " correct_count += 1\n",
+ " # print(f\"OK: {source_column} -> out={target_column} gt={correct_target_columns}\")\n",
+ " else:\n",
+ " print(f\"ER: {source_column} -> out={target_column} gt={correct_target_columns}\")\n",
+ " return correct_count/ float(total)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_source.columns = df_source.columns.str.replace(' ', '')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 179 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8a055f6f3aac4175b83cda61c3bd7ea6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/179 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 213 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9761eef33a4547ffa457c7c0e1fcc6d8",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/213 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ER: Proteomics_Participant_ID -> out=Idx gt={'Case_id'}\n",
+ "ER: Proteomics_TMT_batch -> out=Metformin_treatment gt={'Batch'}\n",
+ "ER: Proteomics_TMT_plex -> out=Number_of_para-aortic_lymph_nodes_positive_for_tumor_by_he gt={'Plex'}\n",
+ "ER: Proteomics_TMT_channel -> out=Number_of_para-aortic_lymph_nodes_positive_for_tumor_by_he gt={'ReporterName'}\n",
+ "ER: Treatment_naive -> out=Follow-up_additional_treatment_radiation_therapy_for_new_tumor gt={'Cancer_history_history_of_any_treatment'}\n",
+ "ER: Tumor_purity -> out=Tumor_necrosis gt={'ABSOLUTE_tumor_purity'}\n",
+ "ER: MSH6 -> out=Ancillary_studies_msh2 gt={'Ancillary_studies_msh6'}\n",
+ "ER: CIBERSORT_Monocytes -> out=Cibersort_Macrophage_M2 gt={'Cibersort_Monocyte'}\n",
+ "ER: CIBERSORT_Eosinophils -> out=xCell_Cancer_associated_fibroblast gt={'Cibersort_Eosinophil'}\n",
+ "ER: CIBERSORT_Neutrophils -> out=CNV_ratio gt={'Cibersort_Neutrophil'}\n",
+ "ER: ESTIMATE_ImmuneScore -> out=Estimate_ESTIMATEScore gt={'Estimate_ImmuneScore'}\n",
+ "Recall: 75.556\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " idx | \n",
+ " xCell_T_cell_CD4+_Th1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Idx | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Case_excluded | \n",
+ " Case_excluded | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Proteomics_TMT_batch | \n",
+ " Metformin_treatment | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Proteomics_TMT_plex | \n",
+ " Number_of_para-aortic_lymph_nodes_positive_for... | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Proteomics_TMT_channel | \n",
+ " Number_of_para-aortic_lymph_nodes_positive_for... | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Proteomics_Parent_Sample_IDs | \n",
+ " Idx | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Proteomics_Aliquot_ID | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Proteomics_Tumor_Normal | \n",
+ " Group | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Proteomics_OCT | \n",
+ " POLE | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Country | \n",
+ " Participant_country | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Histologic_Grade_FIGO | \n",
+ " Histologic_grade | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Myometrial_invasion_Specify | \n",
+ " Myometrial_invasion_present_specify | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Histologic_type | \n",
+ " Histologic_Type | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Treatment_naive | \n",
+ " Follow-up_additional_treatment_radiation_thera... | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Tumor_purity | \n",
+ " Tumor_necrosis | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Clin_Stage_Dist_Mets-cM | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Path_Stage_Dist_Mets-pM | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " tumor_Stage-Pathological | \n",
+ " Tumor_stage_pathological | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " FIGO_stage | \n",
+ " Tumor_stage_pathological | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " LVSI | \n",
+ " Number_of_other_lymph_nodes_examined | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " BMI | \n",
+ " BMI | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Age | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Diabetes | \n",
+ " Diabetes | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Race | \n",
+ " Race | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Ethnicity | \n",
+ " Ethnicity | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Gender | \n",
+ " Sex | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Tumor_Site | \n",
+ " Tumor_site | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Tumor_Site_Other | \n",
+ " Tumor_site_other | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Tumor_Focality | \n",
+ " Tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Tumor_Size_cm | \n",
+ " Tumor_size_cm | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Estrogen_Receptor | \n",
+ " Ancillary_studies_estrogen_receptor | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Estrogen_Receptor_% | \n",
+ " Ancillary_studies_estrogen_receptor | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " Progesterone_Receptor | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Progesterone_Receptor_% | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " MLH1 | \n",
+ " Ancillary_studies_mlh1 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " MLH2 | \n",
+ " Ancillary_studies_mlh1 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " MSH6 | \n",
+ " Ancillary_studies_msh2 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " PMS2 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " p53 | \n",
+ " Ancillary_studies_p53 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " Other_IHC_specify | \n",
+ " Ancillary_studies_other_immunohistochemistry_t... | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " MLH1_Promoter_Hypermethylation | \n",
+ " Ancillary_studies_mlh1_promoter_hypermethylation | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " Num_full_term_pregnancies | \n",
+ " Donor_information_number_of_full_term_pregnancies | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " EPIC_Bcells | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " EPIC_CAFs | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " EPIC_CD4_Tcells | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " EPIC_CD8_Tcells | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " EPIC_Endothelial | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " EPIC_Macrophages | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " EPIC_NKcells | \n",
+ " xCell_NK_cell | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " EPIC_otherCells | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " CIBERSORT_B_cells_naive | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " CIBERSORT_B_cells_memory | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " CIBERSORT_Plasma_cells | \n",
+ " Progeny_PI3K | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " CIBERSORT_T_cells_CD8 | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " CIBERSORT_T_cells_CD4_naive | \n",
+ " Cibersort_T_cell_CD4+_naive | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " CIBERSORT_T_cells_CD4_memory_resting | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " CIBERSORT_T_cells_CD4_memory_activated | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " CIBERSORT_T_cells_follicular_helper | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " CIBERSORT_T_cells_regulatory_(Tregs) | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " CIBERSORT_T_cells_gamma_delta | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " CIBERSORT_NK_cells_resting | \n",
+ " Cibersort_NK_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " CIBERSORT_NK_cells_activated | \n",
+ " Cibersort_NK_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " CIBERSORT_Monocytes | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " CIBERSORT_Macrophages_M0 | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " CIBERSORT_Macrophages_M1 | \n",
+ " Cibersort_Macrophage_M1 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " CIBERSORT_Macrophages_M2 | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " CIBERSORT_Dendritic_cells_resting | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " CIBERSORT_Dendritic_cells_activated | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 71 | \n",
+ " CIBERSORT_Mast_cells_resting | \n",
+ " Cibersort_Mast_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " CIBERSORT_Mast_cells_activated | \n",
+ " Cibersort_Mast_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " CIBERSORT_Eosinophils | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " CIBERSORT_Neutrophils | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " CIBERSORT_Absolute_score | \n",
+ " Cibersort_T_cell_follicular_helper | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " ESTIMATE_StromalScore | \n",
+ " Estimate_StromalScore | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " ESTIMATE_ImmuneScore | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " ESTIMATE_ESTIMATEScore | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " Stemness_score | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " ER_ESR1 | \n",
+ " xCell_stroma_score | \n",
+ "
\n",
+ " \n",
+ " 81 | \n",
+ " PR_PGR | \n",
+ " Progeny_EGFR | \n",
+ "
\n",
+ " \n",
+ " 82 | \n",
+ " Pathway_activity_EGFR | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 83 | \n",
+ " Pathway_activity_Hypoxia | \n",
+ " xCell_immune_score | \n",
+ "
\n",
+ " \n",
+ " 84 | \n",
+ " Pathway_activity_JAK.STAT | \n",
+ " Mutation_signature_SBS5 | \n",
+ "
\n",
+ " \n",
+ " 85 | \n",
+ " Pathway_activity_MAPK | \n",
+ " Progeny_MAPK | \n",
+ "
\n",
+ " \n",
+ " 86 | \n",
+ " Pathway_activity_NFkB | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 87 | \n",
+ " Pathway_activity_PI3K | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " Pathway_activity_TGFb | \n",
+ " ABSOLUTE_tumor_purity | \n",
+ "
\n",
+ " \n",
+ " 89 | \n",
+ " Pathway_activity_TNFa | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 90 | \n",
+ " Pathway_activity_Trail | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 91 | \n",
+ " Pathway_activity_VEGF | \n",
+ " Progeny_VEGF | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " Pathway_activity_p53 | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 93 | \n",
+ " TP53_ATM | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 94 | \n",
+ " TP53_CHEK2 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " TP53_MDM4 | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " TP53_RPS6KA3 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " TP53_TP53 | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " TP53_pathway | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " PI3K_AKT1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " PI3K_AKT2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " PI3K_AKT3 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " PI3K_DEPDC5 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " PI3K_DEPTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 104 | \n",
+ " PI3K_INPP4B | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 105 | \n",
+ " PI3K_MAPKAP1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 106 | \n",
+ " PI3K_MLST8 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 107 | \n",
+ " PI3K_MTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 108 | \n",
+ " PI3K_NPRL2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 109 | \n",
+ " PI3K_NPRL3 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 110 | \n",
+ " PI3K_PDK1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 111 | \n",
+ " PI3K_PIK3CA | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 112 | \n",
+ " PI3K_PIK3CB | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 113 | \n",
+ " PI3K_PIK3R1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 114 | \n",
+ " PI3K_PIK3R2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 115 | \n",
+ " PI3K_PPP2R1A | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 116 | \n",
+ " PI3K_PTEN | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 117 | \n",
+ " PI3K_RHEB | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 118 | \n",
+ " PI3K_RICTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 119 | \n",
+ " PI3K_RPS6 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 120 | \n",
+ " PI3K_RPS6KB1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 121 | \n",
+ " PI3K_RPTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 122 | \n",
+ " PI3K_STK11 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 123 | \n",
+ " PI3K_TSC1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 124 | \n",
+ " PI3K_TSC2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 125 | \n",
+ " PI3K_pathway | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 126 | \n",
+ " HRD_BRCA1 | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 127 | \n",
+ " HRD_BRCA2 | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 128 | \n",
+ " HRD_BRCA1_or_BRCA2 | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 129 | \n",
+ " CNV_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 130 | \n",
+ " CNV_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 131 | \n",
+ " CNV_class | \n",
+ " CNV_status | \n",
+ "
\n",
+ " \n",
+ " 132 | \n",
+ " CNV_idx | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 133 | \n",
+ " CNV_1q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 134 | \n",
+ " CNV_3q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 135 | \n",
+ " CNV_4q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " CNV_1q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 137 | \n",
+ " CNV_3q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 138 | \n",
+ " CNV_4q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 139 | \n",
+ " Purity_Immune | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 140 | \n",
+ " Purity_Cancer | \n",
+ " Progeny_Hypoxia | \n",
+ "
\n",
+ " \n",
+ " 141 | \n",
+ " Purity_Stroma | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 142 | \n",
+ " MSI_status | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ " 143 | \n",
+ " POLE_subtype | \n",
+ " POLE | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " JAK1_MS_INDEL | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ " 145 | \n",
+ " JAK1_Mutation | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 146 | \n",
+ " Log2_variant_per_Mbp | \n",
+ " Progeny_MAPK | \n",
+ "
\n",
+ " \n",
+ " 147 | \n",
+ " Log2_SNP_per_Mbp | \n",
+ " Progeny_JAK.STAT | \n",
+ "
\n",
+ " \n",
+ " 148 | \n",
+ " Log2_INDEL_per_Mbp | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ "
\n",
+ " \n",
+ " 149 | \n",
+ " Log2_variant_total | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 150 | \n",
+ " Log2_SNP_total | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 151 | \n",
+ " Log2_INDEL_total | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 152 | \n",
+ " Genomics_subtype | \n",
+ " Genomic_subtype | \n",
+ "
\n",
+ " \n",
+ " 153 | \n",
+ " Mutation_signature_C>A | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 154 | \n",
+ " Mutation_signature_C>G | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 155 | \n",
+ " Mutation_signature_C>T | \n",
+ " Estimate_ImmuneScore | \n",
+ "
\n",
+ " \n",
+ " 156 | \n",
+ " Mutation_signature_T>C | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 157 | \n",
+ " Mutation_signature_T>A | \n",
+ " Progeny_Hypoxia | \n",
+ "
\n",
+ " \n",
+ " 158 | \n",
+ " Mutation_signature_T>G | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 159 | \n",
+ " WXS_normal_sample_type | \n",
+ " Ancillary_studies_other_immunohistochemistry_p... | \n",
+ "
\n",
+ " \n",
+ " 160 | \n",
+ " WXS_normal_filename | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 161 | \n",
+ " WXS_normal_UUID | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 162 | \n",
+ " WXS_tumor_sample_type | \n",
+ " Tumor_necrosis | \n",
+ "
\n",
+ " \n",
+ " 163 | \n",
+ " WXS_tumor_filename | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 164 | \n",
+ " WXS_tumor_UUID | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 165 | \n",
+ " WGS_normal_sample_type | \n",
+ " Group | \n",
+ "
\n",
+ " \n",
+ " 166 | \n",
+ " WGS_normal_UUID | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 167 | \n",
+ " WGS_tumor_sample_type | \n",
+ " Tumor_site | \n",
+ "
\n",
+ " \n",
+ " 168 | \n",
+ " WGS_tumor_UUID | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 169 | \n",
+ " RNAseq_R1_sample_type | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 170 | \n",
+ " RNAseq_R1_filename | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 171 | \n",
+ " RNAseq_R1_UUID | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 172 | \n",
+ " RNAseq_R2_sample_type | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 173 | \n",
+ " RNAseq_R2_filename | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 174 | \n",
+ " RNAseq_R2_UUID | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 175 | \n",
+ " miRNAseq_sample_type | \n",
+ " Mutation_signature_SBS7a | \n",
+ "
\n",
+ " \n",
+ " 176 | \n",
+ " miRNAseq_UUID | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 177 | \n",
+ " Methylation_available | \n",
+ " Group | \n",
+ "
\n",
+ " \n",
+ " 178 | \n",
+ " Methylation_quality | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source \\\n",
+ "0 idx \n",
+ "1 Proteomics_Participant_ID \n",
+ "2 Case_excluded \n",
+ "3 Proteomics_TMT_batch \n",
+ "4 Proteomics_TMT_plex \n",
+ "5 Proteomics_TMT_channel \n",
+ "6 Proteomics_Parent_Sample_IDs \n",
+ "7 Proteomics_Aliquot_ID \n",
+ "8 Proteomics_Tumor_Normal \n",
+ "9 Proteomics_OCT \n",
+ "10 Country \n",
+ "11 Histologic_Grade_FIGO \n",
+ "12 Myometrial_invasion_Specify \n",
+ "13 Histologic_type \n",
+ "14 Treatment_naive \n",
+ "15 Tumor_purity \n",
+ "16 Path_Stage_Primary_Tumor-pT \n",
+ "17 Path_Stage_Reg_Lymph_Nodes-pN \n",
+ "18 Clin_Stage_Dist_Mets-cM \n",
+ "19 Path_Stage_Dist_Mets-pM \n",
+ "20 tumor_Stage-Pathological \n",
+ "21 FIGO_stage \n",
+ "22 LVSI \n",
+ "23 BMI \n",
+ "24 Age \n",
+ "25 Diabetes \n",
+ "26 Race \n",
+ "27 Ethnicity \n",
+ "28 Gender \n",
+ "29 Tumor_Site \n",
+ "30 Tumor_Site_Other \n",
+ "31 Tumor_Focality \n",
+ "32 Tumor_Size_cm \n",
+ "33 Estrogen_Receptor \n",
+ "34 Estrogen_Receptor_% \n",
+ "35 Progesterone_Receptor \n",
+ "36 Progesterone_Receptor_% \n",
+ "37 MLH1 \n",
+ "38 MLH2 \n",
+ "39 MSH6 \n",
+ "40 PMS2 \n",
+ "41 p53 \n",
+ "42 Other_IHC_specify \n",
+ "43 MLH1_Promoter_Hypermethylation \n",
+ "44 Num_full_term_pregnancies \n",
+ "45 EPIC_Bcells \n",
+ "46 EPIC_CAFs \n",
+ "47 EPIC_CD4_Tcells \n",
+ "48 EPIC_CD8_Tcells \n",
+ "49 EPIC_Endothelial \n",
+ "50 EPIC_Macrophages \n",
+ "51 EPIC_NKcells \n",
+ "52 EPIC_otherCells \n",
+ "53 CIBERSORT_B_cells_naive \n",
+ "54 CIBERSORT_B_cells_memory \n",
+ "55 CIBERSORT_Plasma_cells \n",
+ "56 CIBERSORT_T_cells_CD8 \n",
+ "57 CIBERSORT_T_cells_CD4_naive \n",
+ "58 CIBERSORT_T_cells_CD4_memory_resting \n",
+ "59 CIBERSORT_T_cells_CD4_memory_activated \n",
+ "60 CIBERSORT_T_cells_follicular_helper \n",
+ "61 CIBERSORT_T_cells_regulatory_(Tregs) \n",
+ "62 CIBERSORT_T_cells_gamma_delta \n",
+ "63 CIBERSORT_NK_cells_resting \n",
+ "64 CIBERSORT_NK_cells_activated \n",
+ "65 CIBERSORT_Monocytes \n",
+ "66 CIBERSORT_Macrophages_M0 \n",
+ "67 CIBERSORT_Macrophages_M1 \n",
+ "68 CIBERSORT_Macrophages_M2 \n",
+ "69 CIBERSORT_Dendritic_cells_resting \n",
+ "70 CIBERSORT_Dendritic_cells_activated \n",
+ "71 CIBERSORT_Mast_cells_resting \n",
+ "72 CIBERSORT_Mast_cells_activated \n",
+ "73 CIBERSORT_Eosinophils \n",
+ "74 CIBERSORT_Neutrophils \n",
+ "75 CIBERSORT_Absolute_score \n",
+ "76 ESTIMATE_StromalScore \n",
+ "77 ESTIMATE_ImmuneScore \n",
+ "78 ESTIMATE_ESTIMATEScore \n",
+ "79 Stemness_score \n",
+ "80 ER_ESR1 \n",
+ "81 PR_PGR \n",
+ "82 Pathway_activity_EGFR \n",
+ "83 Pathway_activity_Hypoxia \n",
+ "84 Pathway_activity_JAK.STAT \n",
+ "85 Pathway_activity_MAPK \n",
+ "86 Pathway_activity_NFkB \n",
+ "87 Pathway_activity_PI3K \n",
+ "88 Pathway_activity_TGFb \n",
+ "89 Pathway_activity_TNFa \n",
+ "90 Pathway_activity_Trail \n",
+ "91 Pathway_activity_VEGF \n",
+ "92 Pathway_activity_p53 \n",
+ "93 TP53_ATM \n",
+ "94 TP53_CHEK2 \n",
+ "95 TP53_MDM4 \n",
+ "96 TP53_RPS6KA3 \n",
+ "97 TP53_TP53 \n",
+ "98 TP53_pathway \n",
+ "99 PI3K_AKT1 \n",
+ "100 PI3K_AKT2 \n",
+ "101 PI3K_AKT3 \n",
+ "102 PI3K_DEPDC5 \n",
+ "103 PI3K_DEPTOR \n",
+ "104 PI3K_INPP4B \n",
+ "105 PI3K_MAPKAP1 \n",
+ "106 PI3K_MLST8 \n",
+ "107 PI3K_MTOR \n",
+ "108 PI3K_NPRL2 \n",
+ "109 PI3K_NPRL3 \n",
+ "110 PI3K_PDK1 \n",
+ "111 PI3K_PIK3CA \n",
+ "112 PI3K_PIK3CB \n",
+ "113 PI3K_PIK3R1 \n",
+ "114 PI3K_PIK3R2 \n",
+ "115 PI3K_PPP2R1A \n",
+ "116 PI3K_PTEN \n",
+ "117 PI3K_RHEB \n",
+ "118 PI3K_RICTOR \n",
+ "119 PI3K_RPS6 \n",
+ "120 PI3K_RPS6KB1 \n",
+ "121 PI3K_RPTOR \n",
+ "122 PI3K_STK11 \n",
+ "123 PI3K_TSC1 \n",
+ "124 PI3K_TSC2 \n",
+ "125 PI3K_pathway \n",
+ "126 HRD_BRCA1 \n",
+ "127 HRD_BRCA2 \n",
+ "128 HRD_BRCA1_or_BRCA2 \n",
+ "129 CNV_DEL \n",
+ "130 CNV_AMP \n",
+ "131 CNV_class \n",
+ "132 CNV_idx \n",
+ "133 CNV_1q_DEL \n",
+ "134 CNV_3q_DEL \n",
+ "135 CNV_4q_DEL \n",
+ "136 CNV_1q_AMP \n",
+ "137 CNV_3q_AMP \n",
+ "138 CNV_4q_AMP \n",
+ "139 Purity_Immune \n",
+ "140 Purity_Cancer \n",
+ "141 Purity_Stroma \n",
+ "142 MSI_status \n",
+ "143 POLE_subtype \n",
+ "144 JAK1_MS_INDEL \n",
+ "145 JAK1_Mutation \n",
+ "146 Log2_variant_per_Mbp \n",
+ "147 Log2_SNP_per_Mbp \n",
+ "148 Log2_INDEL_per_Mbp \n",
+ "149 Log2_variant_total \n",
+ "150 Log2_SNP_total \n",
+ "151 Log2_INDEL_total \n",
+ "152 Genomics_subtype \n",
+ "153 Mutation_signature_C>A \n",
+ "154 Mutation_signature_C>G \n",
+ "155 Mutation_signature_C>T \n",
+ "156 Mutation_signature_T>C \n",
+ "157 Mutation_signature_T>A \n",
+ "158 Mutation_signature_T>G \n",
+ "159 WXS_normal_sample_type \n",
+ "160 WXS_normal_filename \n",
+ "161 WXS_normal_UUID \n",
+ "162 WXS_tumor_sample_type \n",
+ "163 WXS_tumor_filename \n",
+ "164 WXS_tumor_UUID \n",
+ "165 WGS_normal_sample_type \n",
+ "166 WGS_normal_UUID \n",
+ "167 WGS_tumor_sample_type \n",
+ "168 WGS_tumor_UUID \n",
+ "169 RNAseq_R1_sample_type \n",
+ "170 RNAseq_R1_filename \n",
+ "171 RNAseq_R1_UUID \n",
+ "172 RNAseq_R2_sample_type \n",
+ "173 RNAseq_R2_filename \n",
+ "174 RNAseq_R2_UUID \n",
+ "175 miRNAseq_sample_type \n",
+ "176 miRNAseq_UUID \n",
+ "177 Methylation_available \n",
+ "178 Methylation_quality \n",
+ "\n",
+ " target \n",
+ "0 xCell_T_cell_CD4+_Th1 \n",
+ "1 Idx \n",
+ "2 Case_excluded \n",
+ "3 Metformin_treatment \n",
+ "4 Number_of_para-aortic_lymph_nodes_positive_for... \n",
+ "5 Number_of_para-aortic_lymph_nodes_positive_for... \n",
+ "6 Idx \n",
+ "7 Aliquot_ID \n",
+ "8 Group \n",
+ "9 POLE \n",
+ "10 Participant_country \n",
+ "11 Histologic_grade \n",
+ "12 Myometrial_invasion_present_specify \n",
+ "13 Histologic_Type \n",
+ "14 Follow-up_additional_treatment_radiation_thera... \n",
+ "15 Tumor_necrosis \n",
+ "16 Pathologic_staging_primary_tumor_pt \n",
+ "17 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "18 Clinical_staging_distant_metastasis_cm \n",
+ "19 Clinical_staging_distant_metastasis_cm \n",
+ "20 Tumor_stage_pathological \n",
+ "21 Tumor_stage_pathological \n",
+ "22 Number_of_other_lymph_nodes_examined \n",
+ "23 BMI \n",
+ "24 Age \n",
+ "25 Diabetes \n",
+ "26 Race \n",
+ "27 Ethnicity \n",
+ "28 Sex \n",
+ "29 Tumor_site \n",
+ "30 Tumor_site_other \n",
+ "31 Tumor_focality \n",
+ "32 Tumor_size_cm \n",
+ "33 Ancillary_studies_estrogen_receptor \n",
+ "34 Ancillary_studies_estrogen_receptor \n",
+ "35 Ancillary_studies_progesterone_receptor \n",
+ "36 Ancillary_studies_progesterone_receptor \n",
+ "37 Ancillary_studies_mlh1 \n",
+ "38 Ancillary_studies_mlh1 \n",
+ "39 Ancillary_studies_msh2 \n",
+ "40 Ancillary_studies_pms2 \n",
+ "41 Ancillary_studies_p53 \n",
+ "42 Ancillary_studies_other_immunohistochemistry_t... \n",
+ "43 Ancillary_studies_mlh1_promoter_hypermethylation \n",
+ "44 Donor_information_number_of_full_term_pregnancies \n",
+ "45 Progeny_Androgen \n",
+ "46 CNV_ratio \n",
+ "47 Aliquot_ID \n",
+ "48 xCell_T_cell_CD4+_Th2 \n",
+ "49 Progeny_Androgen \n",
+ "50 Cibersort_Macrophage_M2 \n",
+ "51 xCell_NK_cell \n",
+ "52 Estimate_ESTIMATEScore \n",
+ "53 Cibersort_T_cell_CD4+_memory_resting \n",
+ "54 xCell_Cancer_associated_fibroblast \n",
+ "55 Progeny_PI3K \n",
+ "56 Cibersort_T_cell_CD4+_memory_resting \n",
+ "57 Cibersort_T_cell_CD4+_naive \n",
+ "58 CNV_ratio \n",
+ "59 xCell_T_cell_CD4+_Th2 \n",
+ "60 Cibersort_T_cell_CD4+_memory_resting \n",
+ "61 Cibersort_T_cell_CD4+_memory_resting \n",
+ "62 xCell_T_cell_CD4+_Th2 \n",
+ "63 Cibersort_NK_cell_activated \n",
+ "64 Cibersort_NK_cell_activated \n",
+ "65 Cibersort_Macrophage_M2 \n",
+ "66 Cibersort_Macrophage_M0 \n",
+ "67 Cibersort_Macrophage_M1 \n",
+ "68 Cibersort_Macrophage_M2 \n",
+ "69 xCell_Cancer_associated_fibroblast \n",
+ "70 Cibersort_T_cell_CD4+_memory_resting \n",
+ "71 Cibersort_Mast_cell_activated \n",
+ "72 Cibersort_Mast_cell_activated \n",
+ "73 xCell_Cancer_associated_fibroblast \n",
+ "74 CNV_ratio \n",
+ "75 Cibersort_T_cell_follicular_helper \n",
+ "76 Estimate_StromalScore \n",
+ "77 Estimate_ESTIMATEScore \n",
+ "78 Estimate_ESTIMATEScore \n",
+ "79 Progeny_Androgen \n",
+ "80 xCell_stroma_score \n",
+ "81 Progeny_EGFR \n",
+ "82 Ancillary_studies_progesterone_receptor \n",
+ "83 xCell_immune_score \n",
+ "84 Mutation_signature_SBS5 \n",
+ "85 Progeny_MAPK \n",
+ "86 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "87 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "88 ABSOLUTE_tumor_purity \n",
+ "89 Pathologic_staging_primary_tumor_pt \n",
+ "90 Pathologic_staging_primary_tumor_pt \n",
+ "91 Progeny_VEGF \n",
+ "92 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "93 TP53 \n",
+ "94 Ancillary_studies_pms2 \n",
+ "95 TP53 \n",
+ "96 Ancillary_studies_pms2 \n",
+ "97 TP53 \n",
+ "98 TP53 \n",
+ "99 PIK3CA \n",
+ "100 PIK3CA \n",
+ "101 PIK3CA \n",
+ "102 PIK3CA \n",
+ "103 PIK3CA \n",
+ "104 PIK3CA \n",
+ "105 PIK3CA \n",
+ "106 Ancillary_studies_pms2 \n",
+ "107 PIK3CA \n",
+ "108 PIK3CA \n",
+ "109 PIK3CA \n",
+ "110 PIK3CA \n",
+ "111 PIK3CA \n",
+ "112 PIK3CA \n",
+ "113 PIK3CA \n",
+ "114 PIK3CA \n",
+ "115 PIK3CA \n",
+ "116 PIK3CA \n",
+ "117 PIK3CA \n",
+ "118 PIK3CA \n",
+ "119 PIK3CA \n",
+ "120 PIK3CA \n",
+ "121 PIK3CA \n",
+ "122 PIK3CA \n",
+ "123 PIK3CA \n",
+ "124 PIK3CA \n",
+ "125 PIK3CA \n",
+ "126 ARID1A \n",
+ "127 ARID1A \n",
+ "128 Batch \n",
+ "129 CNV_ratio \n",
+ "130 CNV_ratio \n",
+ "131 CNV_status \n",
+ "132 CNV_ratio \n",
+ "133 CNV_ratio \n",
+ "134 CNV_ratio \n",
+ "135 CNV_ratio \n",
+ "136 CNV_ratio \n",
+ "137 CNV_ratio \n",
+ "138 CNV_ratio \n",
+ "139 Progeny_Androgen \n",
+ "140 Progeny_Hypoxia \n",
+ "141 Progeny_Androgen \n",
+ "142 MSI_status \n",
+ "143 POLE \n",
+ "144 MSI_status \n",
+ "145 Clinical_staging_distant_metastasis_cm \n",
+ "146 Progeny_MAPK \n",
+ "147 Progeny_JAK.STAT \n",
+ "148 Cibersort_Macrophage_M0 \n",
+ "149 Estimate_ESTIMATEScore \n",
+ "150 CNV_ratio \n",
+ "151 Estimate_ESTIMATEScore \n",
+ "152 Genomic_subtype \n",
+ "153 CNV_ratio \n",
+ "154 Cibersort_Macrophage_M2 \n",
+ "155 Estimate_ImmuneScore \n",
+ "156 Cibersort_Macrophage_M2 \n",
+ "157 Progeny_Hypoxia \n",
+ "158 Progeny_TGFb \n",
+ "159 Ancillary_studies_other_immunohistochemistry_p... \n",
+ "160 Progeny_WNT \n",
+ "161 Progeny_WNT \n",
+ "162 Tumor_necrosis \n",
+ "163 Progeny_TGFb \n",
+ "164 Progeny_WNT \n",
+ "165 Group \n",
+ "166 Progeny_WNT \n",
+ "167 Tumor_site \n",
+ "168 Progeny_TGFb \n",
+ "169 ARID1A \n",
+ "170 Aliquot_ID \n",
+ "171 Aliquot_ID \n",
+ "172 ARID1A \n",
+ "173 Aliquot_ID \n",
+ "174 Aliquot_ID \n",
+ "175 Mutation_signature_SBS7a \n",
+ "176 Batch \n",
+ "177 Group \n",
+ "178 MSI_status "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "schema_mapping = bdi.match_schema(df_source, df_target, method='ct_learning')\n",
+ "print(f\"Recall: {100*accuracy(gt_set, schema_mapping):.3f}\")\n",
+ "schema_mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 179 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "fe32ed0bab28433ea1383cb720417793",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/179 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 213 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "791829c5722e45f9b247e06cee321d87",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/213 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ER: Proteomics_TMT_batch -> out=Metformin_treatment gt={'Batch'}\n",
+ "ER: Proteomics_TMT_plex -> out=Number_of_para-aortic_lymph_nodes_positive_for_tumor_by_he gt={'Plex'}\n",
+ "ER: Treatment_naive -> out=Follow-up_additional_treatment_radiation_therapy_for_new_tumor gt={'Cancer_history_history_of_any_treatment'}\n",
+ "ER: Tumor_purity -> out=Tumor_necrosis gt={'ABSOLUTE_tumor_purity'}\n",
+ "ER: MSH6 -> out=Ancillary_studies_msh2 gt={'Ancillary_studies_msh6'}\n",
+ "ER: CIBERSORT_Monocytes -> out=Cibersort_Macrophage_M2 gt={'Cibersort_Monocyte'}\n",
+ "ER: CIBERSORT_Eosinophils -> out=xCell_Cancer_associated_fibroblast gt={'Cibersort_Eosinophil'}\n",
+ "ER: CIBERSORT_Neutrophils -> out=CNV_ratio gt={'Cibersort_Neutrophil'}\n",
+ "ER: ESTIMATE_ImmuneScore -> out=Estimate_ESTIMATEScore gt={'Estimate_ImmuneScore'}\n",
+ "Recall: 80.000\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " idx | \n",
+ " Number_of_para-aortic_lymph_nodes_examined | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Case_excluded | \n",
+ " Case_excluded | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Proteomics_TMT_batch | \n",
+ " Metformin_treatment | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Proteomics_TMT_plex | \n",
+ " Number_of_para-aortic_lymph_nodes_positive_for... | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Proteomics_TMT_channel | \n",
+ " ReporterName | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Proteomics_Parent_Sample_IDs | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Proteomics_Aliquot_ID | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Proteomics_Tumor_Normal | \n",
+ " Group | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Proteomics_OCT | \n",
+ " POLE | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Country | \n",
+ " Participant_country | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Histologic_Grade_FIGO | \n",
+ " Histologic_grade | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Myometrial_invasion_Specify | \n",
+ " Myometrial_invasion_present_specify | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Histologic_type | \n",
+ " Histologic_Type | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Treatment_naive | \n",
+ " Follow-up_additional_treatment_radiation_thera... | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Tumor_purity | \n",
+ " Tumor_necrosis | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Clin_Stage_Dist_Mets-cM | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Path_Stage_Dist_Mets-pM | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " tumor_Stage-Pathological | \n",
+ " Tumor_stage_pathological | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " FIGO_stage | \n",
+ " Tumor_stage_pathological | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " LVSI | \n",
+ " Number_of_other_lymph_nodes_examined | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " BMI | \n",
+ " BMI | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Age | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Diabetes | \n",
+ " Diabetes | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Race | \n",
+ " Race | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Ethnicity | \n",
+ " Ethnicity | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Gender | \n",
+ " Sex | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Tumor_Site | \n",
+ " Tumor_site | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Tumor_Site_Other | \n",
+ " Tumor_site_other | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Tumor_Focality | \n",
+ " Tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Tumor_Size_cm | \n",
+ " Tumor_size_cm | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Estrogen_Receptor | \n",
+ " Ancillary_studies_estrogen_receptor | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Estrogen_Receptor_% | \n",
+ " Ancillary_studies_estrogen_receptor | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " Progesterone_Receptor | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Progesterone_Receptor_% | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " MLH1 | \n",
+ " Ancillary_studies_mlh1 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " MLH2 | \n",
+ " Ancillary_studies_mlh1 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " MSH6 | \n",
+ " Ancillary_studies_msh2 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " PMS2 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " p53 | \n",
+ " Ancillary_studies_p53 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " Other_IHC_specify | \n",
+ " Ancillary_studies_other_immunohistochemistry_t... | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " MLH1_Promoter_Hypermethylation | \n",
+ " Ancillary_studies_mlh1_promoter_hypermethylation | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " Num_full_term_pregnancies | \n",
+ " Donor_information_number_of_full_term_pregnancies | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " EPIC_Bcells | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " EPIC_CAFs | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " EPIC_CD4_Tcells | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " EPIC_CD8_Tcells | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " EPIC_Endothelial | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " EPIC_Macrophages | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " EPIC_NKcells | \n",
+ " xCell_NK_cell | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " EPIC_otherCells | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " CIBERSORT_B_cells_naive | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " CIBERSORT_B_cells_memory | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " CIBERSORT_Plasma_cells | \n",
+ " Progeny_PI3K | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " CIBERSORT_T_cells_CD8 | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " CIBERSORT_T_cells_CD4_naive | \n",
+ " Cibersort_T_cell_CD4+_naive | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " CIBERSORT_T_cells_CD4_memory_resting | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " CIBERSORT_T_cells_CD4_memory_activated | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " CIBERSORT_T_cells_follicular_helper | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " CIBERSORT_T_cells_regulatory_(Tregs) | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " CIBERSORT_T_cells_gamma_delta | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " CIBERSORT_NK_cells_resting | \n",
+ " Cibersort_NK_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " CIBERSORT_NK_cells_activated | \n",
+ " Cibersort_NK_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " CIBERSORT_Monocytes | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " CIBERSORT_Macrophages_M0 | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " CIBERSORT_Macrophages_M1 | \n",
+ " Cibersort_Macrophage_M1 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " CIBERSORT_Macrophages_M2 | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " CIBERSORT_Dendritic_cells_resting | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " CIBERSORT_Dendritic_cells_activated | \n",
+ " Cibersort_T_cell_CD4+_memory_resting | \n",
+ "
\n",
+ " \n",
+ " 71 | \n",
+ " CIBERSORT_Mast_cells_resting | \n",
+ " Cibersort_Mast_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " CIBERSORT_Mast_cells_activated | \n",
+ " Cibersort_Mast_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " CIBERSORT_Eosinophils | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " CIBERSORT_Neutrophils | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " CIBERSORT_Absolute_score | \n",
+ " Cibersort_T_cell_follicular_helper | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " ESTIMATE_StromalScore | \n",
+ " Estimate_StromalScore | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " ESTIMATE_ImmuneScore | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " ESTIMATE_ESTIMATEScore | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " Stemness_score | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " ER_ESR1 | \n",
+ " xCell_stroma_score | \n",
+ "
\n",
+ " \n",
+ " 81 | \n",
+ " PR_PGR | \n",
+ " Progeny_EGFR | \n",
+ "
\n",
+ " \n",
+ " 82 | \n",
+ " Pathway_activity_EGFR | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 83 | \n",
+ " Pathway_activity_Hypoxia | \n",
+ " xCell_immune_score | \n",
+ "
\n",
+ " \n",
+ " 84 | \n",
+ " Pathway_activity_JAK.STAT | \n",
+ " Mutation_signature_SBS5 | \n",
+ "
\n",
+ " \n",
+ " 85 | \n",
+ " Pathway_activity_MAPK | \n",
+ " Progeny_MAPK | \n",
+ "
\n",
+ " \n",
+ " 86 | \n",
+ " Pathway_activity_NFkB | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 87 | \n",
+ " Pathway_activity_PI3K | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " Pathway_activity_TGFb | \n",
+ " ABSOLUTE_tumor_purity | \n",
+ "
\n",
+ " \n",
+ " 89 | \n",
+ " Pathway_activity_TNFa | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 90 | \n",
+ " Pathway_activity_Trail | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 91 | \n",
+ " Pathway_activity_VEGF | \n",
+ " Progeny_VEGF | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " Pathway_activity_p53 | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 93 | \n",
+ " TP53_ATM | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 94 | \n",
+ " TP53_CHEK2 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " TP53_MDM4 | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " TP53_RPS6KA3 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " TP53_TP53 | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " TP53_pathway | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " PI3K_AKT1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " PI3K_AKT2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " PI3K_AKT3 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " PI3K_DEPDC5 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " PI3K_DEPTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 104 | \n",
+ " PI3K_INPP4B | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 105 | \n",
+ " PI3K_MAPKAP1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 106 | \n",
+ " PI3K_MLST8 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 107 | \n",
+ " PI3K_MTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 108 | \n",
+ " PI3K_NPRL2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 109 | \n",
+ " PI3K_NPRL3 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 110 | \n",
+ " PI3K_PDK1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 111 | \n",
+ " PI3K_PIK3CA | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 112 | \n",
+ " PI3K_PIK3CB | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 113 | \n",
+ " PI3K_PIK3R1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 114 | \n",
+ " PI3K_PIK3R2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 115 | \n",
+ " PI3K_PPP2R1A | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 116 | \n",
+ " PI3K_PTEN | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 117 | \n",
+ " PI3K_RHEB | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 118 | \n",
+ " PI3K_RICTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 119 | \n",
+ " PI3K_RPS6 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 120 | \n",
+ " PI3K_RPS6KB1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 121 | \n",
+ " PI3K_RPTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 122 | \n",
+ " PI3K_STK11 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 123 | \n",
+ " PI3K_TSC1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 124 | \n",
+ " PI3K_TSC2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 125 | \n",
+ " PI3K_pathway | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 126 | \n",
+ " HRD_BRCA1 | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 127 | \n",
+ " HRD_BRCA2 | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 128 | \n",
+ " HRD_BRCA1_or_BRCA2 | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 129 | \n",
+ " CNV_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 130 | \n",
+ " CNV_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 131 | \n",
+ " CNV_class | \n",
+ " CNV_status | \n",
+ "
\n",
+ " \n",
+ " 132 | \n",
+ " CNV_idx | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 133 | \n",
+ " CNV_1q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 134 | \n",
+ " CNV_3q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 135 | \n",
+ " CNV_4q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " CNV_1q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 137 | \n",
+ " CNV_3q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 138 | \n",
+ " CNV_4q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 139 | \n",
+ " Purity_Immune | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 140 | \n",
+ " Purity_Cancer | \n",
+ " Progeny_Hypoxia | \n",
+ "
\n",
+ " \n",
+ " 141 | \n",
+ " Purity_Stroma | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 142 | \n",
+ " MSI_status | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ " 143 | \n",
+ " POLE_subtype | \n",
+ " POLE | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " JAK1_MS_INDEL | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ " 145 | \n",
+ " JAK1_Mutation | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 146 | \n",
+ " Log2_variant_per_Mbp | \n",
+ " Progeny_MAPK | \n",
+ "
\n",
+ " \n",
+ " 147 | \n",
+ " Log2_SNP_per_Mbp | \n",
+ " Progeny_JAK.STAT | \n",
+ "
\n",
+ " \n",
+ " 148 | \n",
+ " Log2_INDEL_per_Mbp | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ "
\n",
+ " \n",
+ " 149 | \n",
+ " Log2_variant_total | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 150 | \n",
+ " Log2_SNP_total | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 151 | \n",
+ " Log2_INDEL_total | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 152 | \n",
+ " Genomics_subtype | \n",
+ " Genomic_subtype | \n",
+ "
\n",
+ " \n",
+ " 153 | \n",
+ " Mutation_signature_C>A | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 154 | \n",
+ " Mutation_signature_C>G | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 155 | \n",
+ " Mutation_signature_C>T | \n",
+ " Estimate_ImmuneScore | \n",
+ "
\n",
+ " \n",
+ " 156 | \n",
+ " Mutation_signature_T>C | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 157 | \n",
+ " Mutation_signature_T>A | \n",
+ " Progeny_Hypoxia | \n",
+ "
\n",
+ " \n",
+ " 158 | \n",
+ " Mutation_signature_T>G | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 159 | \n",
+ " WXS_normal_sample_type | \n",
+ " Ancillary_studies_other_immunohistochemistry_p... | \n",
+ "
\n",
+ " \n",
+ " 160 | \n",
+ " WXS_normal_filename | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 161 | \n",
+ " WXS_normal_UUID | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 162 | \n",
+ " WXS_tumor_sample_type | \n",
+ " Tumor_necrosis | \n",
+ "
\n",
+ " \n",
+ " 163 | \n",
+ " WXS_tumor_filename | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 164 | \n",
+ " WXS_tumor_UUID | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 165 | \n",
+ " WGS_normal_sample_type | \n",
+ " Group | \n",
+ "
\n",
+ " \n",
+ " 166 | \n",
+ " WGS_normal_UUID | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 167 | \n",
+ " WGS_tumor_sample_type | \n",
+ " Tumor_site | \n",
+ "
\n",
+ " \n",
+ " 168 | \n",
+ " WGS_tumor_UUID | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 169 | \n",
+ " RNAseq_R1_sample_type | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 170 | \n",
+ " RNAseq_R1_filename | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 171 | \n",
+ " RNAseq_R1_UUID | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 172 | \n",
+ " RNAseq_R2_sample_type | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 173 | \n",
+ " RNAseq_R2_filename | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 174 | \n",
+ " RNAseq_R2_UUID | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 175 | \n",
+ " miRNAseq_sample_type | \n",
+ " Mutation_signature_SBS7a | \n",
+ "
\n",
+ " \n",
+ " 176 | \n",
+ " miRNAseq_UUID | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 177 | \n",
+ " Methylation_available | \n",
+ " Group | \n",
+ "
\n",
+ " \n",
+ " 178 | \n",
+ " Methylation_quality | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source \\\n",
+ "0 idx \n",
+ "1 Proteomics_Participant_ID \n",
+ "2 Case_excluded \n",
+ "3 Proteomics_TMT_batch \n",
+ "4 Proteomics_TMT_plex \n",
+ "5 Proteomics_TMT_channel \n",
+ "6 Proteomics_Parent_Sample_IDs \n",
+ "7 Proteomics_Aliquot_ID \n",
+ "8 Proteomics_Tumor_Normal \n",
+ "9 Proteomics_OCT \n",
+ "10 Country \n",
+ "11 Histologic_Grade_FIGO \n",
+ "12 Myometrial_invasion_Specify \n",
+ "13 Histologic_type \n",
+ "14 Treatment_naive \n",
+ "15 Tumor_purity \n",
+ "16 Path_Stage_Primary_Tumor-pT \n",
+ "17 Path_Stage_Reg_Lymph_Nodes-pN \n",
+ "18 Clin_Stage_Dist_Mets-cM \n",
+ "19 Path_Stage_Dist_Mets-pM \n",
+ "20 tumor_Stage-Pathological \n",
+ "21 FIGO_stage \n",
+ "22 LVSI \n",
+ "23 BMI \n",
+ "24 Age \n",
+ "25 Diabetes \n",
+ "26 Race \n",
+ "27 Ethnicity \n",
+ "28 Gender \n",
+ "29 Tumor_Site \n",
+ "30 Tumor_Site_Other \n",
+ "31 Tumor_Focality \n",
+ "32 Tumor_Size_cm \n",
+ "33 Estrogen_Receptor \n",
+ "34 Estrogen_Receptor_% \n",
+ "35 Progesterone_Receptor \n",
+ "36 Progesterone_Receptor_% \n",
+ "37 MLH1 \n",
+ "38 MLH2 \n",
+ "39 MSH6 \n",
+ "40 PMS2 \n",
+ "41 p53 \n",
+ "42 Other_IHC_specify \n",
+ "43 MLH1_Promoter_Hypermethylation \n",
+ "44 Num_full_term_pregnancies \n",
+ "45 EPIC_Bcells \n",
+ "46 EPIC_CAFs \n",
+ "47 EPIC_CD4_Tcells \n",
+ "48 EPIC_CD8_Tcells \n",
+ "49 EPIC_Endothelial \n",
+ "50 EPIC_Macrophages \n",
+ "51 EPIC_NKcells \n",
+ "52 EPIC_otherCells \n",
+ "53 CIBERSORT_B_cells_naive \n",
+ "54 CIBERSORT_B_cells_memory \n",
+ "55 CIBERSORT_Plasma_cells \n",
+ "56 CIBERSORT_T_cells_CD8 \n",
+ "57 CIBERSORT_T_cells_CD4_naive \n",
+ "58 CIBERSORT_T_cells_CD4_memory_resting \n",
+ "59 CIBERSORT_T_cells_CD4_memory_activated \n",
+ "60 CIBERSORT_T_cells_follicular_helper \n",
+ "61 CIBERSORT_T_cells_regulatory_(Tregs) \n",
+ "62 CIBERSORT_T_cells_gamma_delta \n",
+ "63 CIBERSORT_NK_cells_resting \n",
+ "64 CIBERSORT_NK_cells_activated \n",
+ "65 CIBERSORT_Monocytes \n",
+ "66 CIBERSORT_Macrophages_M0 \n",
+ "67 CIBERSORT_Macrophages_M1 \n",
+ "68 CIBERSORT_Macrophages_M2 \n",
+ "69 CIBERSORT_Dendritic_cells_resting \n",
+ "70 CIBERSORT_Dendritic_cells_activated \n",
+ "71 CIBERSORT_Mast_cells_resting \n",
+ "72 CIBERSORT_Mast_cells_activated \n",
+ "73 CIBERSORT_Eosinophils \n",
+ "74 CIBERSORT_Neutrophils \n",
+ "75 CIBERSORT_Absolute_score \n",
+ "76 ESTIMATE_StromalScore \n",
+ "77 ESTIMATE_ImmuneScore \n",
+ "78 ESTIMATE_ESTIMATEScore \n",
+ "79 Stemness_score \n",
+ "80 ER_ESR1 \n",
+ "81 PR_PGR \n",
+ "82 Pathway_activity_EGFR \n",
+ "83 Pathway_activity_Hypoxia \n",
+ "84 Pathway_activity_JAK.STAT \n",
+ "85 Pathway_activity_MAPK \n",
+ "86 Pathway_activity_NFkB \n",
+ "87 Pathway_activity_PI3K \n",
+ "88 Pathway_activity_TGFb \n",
+ "89 Pathway_activity_TNFa \n",
+ "90 Pathway_activity_Trail \n",
+ "91 Pathway_activity_VEGF \n",
+ "92 Pathway_activity_p53 \n",
+ "93 TP53_ATM \n",
+ "94 TP53_CHEK2 \n",
+ "95 TP53_MDM4 \n",
+ "96 TP53_RPS6KA3 \n",
+ "97 TP53_TP53 \n",
+ "98 TP53_pathway \n",
+ "99 PI3K_AKT1 \n",
+ "100 PI3K_AKT2 \n",
+ "101 PI3K_AKT3 \n",
+ "102 PI3K_DEPDC5 \n",
+ "103 PI3K_DEPTOR \n",
+ "104 PI3K_INPP4B \n",
+ "105 PI3K_MAPKAP1 \n",
+ "106 PI3K_MLST8 \n",
+ "107 PI3K_MTOR \n",
+ "108 PI3K_NPRL2 \n",
+ "109 PI3K_NPRL3 \n",
+ "110 PI3K_PDK1 \n",
+ "111 PI3K_PIK3CA \n",
+ "112 PI3K_PIK3CB \n",
+ "113 PI3K_PIK3R1 \n",
+ "114 PI3K_PIK3R2 \n",
+ "115 PI3K_PPP2R1A \n",
+ "116 PI3K_PTEN \n",
+ "117 PI3K_RHEB \n",
+ "118 PI3K_RICTOR \n",
+ "119 PI3K_RPS6 \n",
+ "120 PI3K_RPS6KB1 \n",
+ "121 PI3K_RPTOR \n",
+ "122 PI3K_STK11 \n",
+ "123 PI3K_TSC1 \n",
+ "124 PI3K_TSC2 \n",
+ "125 PI3K_pathway \n",
+ "126 HRD_BRCA1 \n",
+ "127 HRD_BRCA2 \n",
+ "128 HRD_BRCA1_or_BRCA2 \n",
+ "129 CNV_DEL \n",
+ "130 CNV_AMP \n",
+ "131 CNV_class \n",
+ "132 CNV_idx \n",
+ "133 CNV_1q_DEL \n",
+ "134 CNV_3q_DEL \n",
+ "135 CNV_4q_DEL \n",
+ "136 CNV_1q_AMP \n",
+ "137 CNV_3q_AMP \n",
+ "138 CNV_4q_AMP \n",
+ "139 Purity_Immune \n",
+ "140 Purity_Cancer \n",
+ "141 Purity_Stroma \n",
+ "142 MSI_status \n",
+ "143 POLE_subtype \n",
+ "144 JAK1_MS_INDEL \n",
+ "145 JAK1_Mutation \n",
+ "146 Log2_variant_per_Mbp \n",
+ "147 Log2_SNP_per_Mbp \n",
+ "148 Log2_INDEL_per_Mbp \n",
+ "149 Log2_variant_total \n",
+ "150 Log2_SNP_total \n",
+ "151 Log2_INDEL_total \n",
+ "152 Genomics_subtype \n",
+ "153 Mutation_signature_C>A \n",
+ "154 Mutation_signature_C>G \n",
+ "155 Mutation_signature_C>T \n",
+ "156 Mutation_signature_T>C \n",
+ "157 Mutation_signature_T>A \n",
+ "158 Mutation_signature_T>G \n",
+ "159 WXS_normal_sample_type \n",
+ "160 WXS_normal_filename \n",
+ "161 WXS_normal_UUID \n",
+ "162 WXS_tumor_sample_type \n",
+ "163 WXS_tumor_filename \n",
+ "164 WXS_tumor_UUID \n",
+ "165 WGS_normal_sample_type \n",
+ "166 WGS_normal_UUID \n",
+ "167 WGS_tumor_sample_type \n",
+ "168 WGS_tumor_UUID \n",
+ "169 RNAseq_R1_sample_type \n",
+ "170 RNAseq_R1_filename \n",
+ "171 RNAseq_R1_UUID \n",
+ "172 RNAseq_R2_sample_type \n",
+ "173 RNAseq_R2_filename \n",
+ "174 RNAseq_R2_UUID \n",
+ "175 miRNAseq_sample_type \n",
+ "176 miRNAseq_UUID \n",
+ "177 Methylation_available \n",
+ "178 Methylation_quality \n",
+ "\n",
+ " target \n",
+ "0 Number_of_para-aortic_lymph_nodes_examined \n",
+ "1 Case_id \n",
+ "2 Case_excluded \n",
+ "3 Metformin_treatment \n",
+ "4 Number_of_para-aortic_lymph_nodes_positive_for... \n",
+ "5 ReporterName \n",
+ "6 Case_id \n",
+ "7 Aliquot_ID \n",
+ "8 Group \n",
+ "9 POLE \n",
+ "10 Participant_country \n",
+ "11 Histologic_grade \n",
+ "12 Myometrial_invasion_present_specify \n",
+ "13 Histologic_Type \n",
+ "14 Follow-up_additional_treatment_radiation_thera... \n",
+ "15 Tumor_necrosis \n",
+ "16 Pathologic_staging_primary_tumor_pt \n",
+ "17 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "18 Clinical_staging_distant_metastasis_cm \n",
+ "19 Clinical_staging_distant_metastasis_cm \n",
+ "20 Tumor_stage_pathological \n",
+ "21 Tumor_stage_pathological \n",
+ "22 Number_of_other_lymph_nodes_examined \n",
+ "23 BMI \n",
+ "24 Age \n",
+ "25 Diabetes \n",
+ "26 Race \n",
+ "27 Ethnicity \n",
+ "28 Sex \n",
+ "29 Tumor_site \n",
+ "30 Tumor_site_other \n",
+ "31 Tumor_focality \n",
+ "32 Tumor_size_cm \n",
+ "33 Ancillary_studies_estrogen_receptor \n",
+ "34 Ancillary_studies_estrogen_receptor \n",
+ "35 Ancillary_studies_progesterone_receptor \n",
+ "36 Ancillary_studies_progesterone_receptor \n",
+ "37 Ancillary_studies_mlh1 \n",
+ "38 Ancillary_studies_mlh1 \n",
+ "39 Ancillary_studies_msh2 \n",
+ "40 Ancillary_studies_pms2 \n",
+ "41 Ancillary_studies_p53 \n",
+ "42 Ancillary_studies_other_immunohistochemistry_t... \n",
+ "43 Ancillary_studies_mlh1_promoter_hypermethylation \n",
+ "44 Donor_information_number_of_full_term_pregnancies \n",
+ "45 Progeny_Androgen \n",
+ "46 CNV_ratio \n",
+ "47 Aliquot_ID \n",
+ "48 xCell_T_cell_CD4+_Th2 \n",
+ "49 Progeny_Androgen \n",
+ "50 Cibersort_Macrophage_M2 \n",
+ "51 xCell_NK_cell \n",
+ "52 Estimate_ESTIMATEScore \n",
+ "53 Cibersort_T_cell_CD4+_memory_resting \n",
+ "54 xCell_Cancer_associated_fibroblast \n",
+ "55 Progeny_PI3K \n",
+ "56 Cibersort_T_cell_CD4+_memory_resting \n",
+ "57 Cibersort_T_cell_CD4+_naive \n",
+ "58 CNV_ratio \n",
+ "59 xCell_T_cell_CD4+_Th2 \n",
+ "60 Cibersort_T_cell_CD4+_memory_resting \n",
+ "61 Cibersort_T_cell_CD4+_memory_resting \n",
+ "62 xCell_T_cell_CD4+_Th2 \n",
+ "63 Cibersort_NK_cell_activated \n",
+ "64 Cibersort_NK_cell_activated \n",
+ "65 Cibersort_Macrophage_M2 \n",
+ "66 Cibersort_Macrophage_M0 \n",
+ "67 Cibersort_Macrophage_M1 \n",
+ "68 Cibersort_Macrophage_M2 \n",
+ "69 xCell_Cancer_associated_fibroblast \n",
+ "70 Cibersort_T_cell_CD4+_memory_resting \n",
+ "71 Cibersort_Mast_cell_activated \n",
+ "72 Cibersort_Mast_cell_activated \n",
+ "73 xCell_Cancer_associated_fibroblast \n",
+ "74 CNV_ratio \n",
+ "75 Cibersort_T_cell_follicular_helper \n",
+ "76 Estimate_StromalScore \n",
+ "77 Estimate_ESTIMATEScore \n",
+ "78 Estimate_ESTIMATEScore \n",
+ "79 Progeny_Androgen \n",
+ "80 xCell_stroma_score \n",
+ "81 Progeny_EGFR \n",
+ "82 Ancillary_studies_progesterone_receptor \n",
+ "83 xCell_immune_score \n",
+ "84 Mutation_signature_SBS5 \n",
+ "85 Progeny_MAPK \n",
+ "86 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "87 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "88 ABSOLUTE_tumor_purity \n",
+ "89 Pathologic_staging_primary_tumor_pt \n",
+ "90 Pathologic_staging_primary_tumor_pt \n",
+ "91 Progeny_VEGF \n",
+ "92 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "93 TP53 \n",
+ "94 Ancillary_studies_pms2 \n",
+ "95 TP53 \n",
+ "96 Ancillary_studies_pms2 \n",
+ "97 TP53 \n",
+ "98 TP53 \n",
+ "99 PIK3CA \n",
+ "100 PIK3CA \n",
+ "101 PIK3CA \n",
+ "102 PIK3CA \n",
+ "103 PIK3CA \n",
+ "104 PIK3CA \n",
+ "105 PIK3CA \n",
+ "106 Ancillary_studies_pms2 \n",
+ "107 PIK3CA \n",
+ "108 PIK3CA \n",
+ "109 PIK3CA \n",
+ "110 PIK3CA \n",
+ "111 PIK3CA \n",
+ "112 PIK3CA \n",
+ "113 PIK3CA \n",
+ "114 PIK3CA \n",
+ "115 PIK3CA \n",
+ "116 PIK3CA \n",
+ "117 PIK3CA \n",
+ "118 PIK3CA \n",
+ "119 PIK3CA \n",
+ "120 PIK3CA \n",
+ "121 PIK3CA \n",
+ "122 PIK3CA \n",
+ "123 PIK3CA \n",
+ "124 PIK3CA \n",
+ "125 PIK3CA \n",
+ "126 ARID1A \n",
+ "127 ARID1A \n",
+ "128 Batch \n",
+ "129 CNV_ratio \n",
+ "130 CNV_ratio \n",
+ "131 CNV_status \n",
+ "132 CNV_ratio \n",
+ "133 CNV_ratio \n",
+ "134 CNV_ratio \n",
+ "135 CNV_ratio \n",
+ "136 CNV_ratio \n",
+ "137 CNV_ratio \n",
+ "138 CNV_ratio \n",
+ "139 Progeny_Androgen \n",
+ "140 Progeny_Hypoxia \n",
+ "141 Progeny_Androgen \n",
+ "142 MSI_status \n",
+ "143 POLE \n",
+ "144 MSI_status \n",
+ "145 Clinical_staging_distant_metastasis_cm \n",
+ "146 Progeny_MAPK \n",
+ "147 Progeny_JAK.STAT \n",
+ "148 Cibersort_Macrophage_M0 \n",
+ "149 Estimate_ESTIMATEScore \n",
+ "150 CNV_ratio \n",
+ "151 Estimate_ESTIMATEScore \n",
+ "152 Genomic_subtype \n",
+ "153 CNV_ratio \n",
+ "154 Cibersort_Macrophage_M2 \n",
+ "155 Estimate_ImmuneScore \n",
+ "156 Cibersort_Macrophage_M2 \n",
+ "157 Progeny_Hypoxia \n",
+ "158 Progeny_TGFb \n",
+ "159 Ancillary_studies_other_immunohistochemistry_p... \n",
+ "160 Progeny_WNT \n",
+ "161 Progeny_WNT \n",
+ "162 Tumor_necrosis \n",
+ "163 Progeny_TGFb \n",
+ "164 Progeny_WNT \n",
+ "165 Group \n",
+ "166 Progeny_WNT \n",
+ "167 Tumor_site \n",
+ "168 Progeny_TGFb \n",
+ "169 ARID1A \n",
+ "170 Aliquot_ID \n",
+ "171 Aliquot_ID \n",
+ "172 ARID1A \n",
+ "173 Aliquot_ID \n",
+ "174 Aliquot_ID \n",
+ "175 Mutation_signature_SBS7a \n",
+ "176 Batch \n",
+ "177 Group \n",
+ "178 MSI_status "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "schema_mapping = bdi.match_schema(df_source, df_target, method='max_val_sim')\n",
+ "print(f\"Recall: {100*accuracy(gt_set, schema_mapping):.3f}\")\n",
+ "schema_mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 1 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4a7415c43f464277a03431032c884d82",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 213 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6787b5314beb4557bef5b1d63bad3b0b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/213 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Idx | \n",
+ " 0.173261 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Case_id | \n",
+ " 0.158389 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Aliquot_ID | \n",
+ " 0.123715 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Proteomics_Participant_ID | \n",
+ " xCell_T_cell_CD8+_central_memory | \n",
+ " 0.103956 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Progeny_p53 | \n",
+ " 0.102078 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Proteomics_Participant_ID Idx 0.173261\n",
+ "1 Proteomics_Participant_ID Case_id 0.158389\n",
+ "2 Proteomics_Participant_ID Aliquot_ID 0.123715\n",
+ "3 Proteomics_Participant_ID xCell_T_cell_CD8+_central_memory 0.103956\n",
+ "4 Proteomics_Participant_ID Progeny_p53 0.102078"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.top_matches(df_source, columns=[\"Proteomics_Participant_ID\"], target=df_target, top_k=5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.201397 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.059600 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.032400 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.055500 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.107774 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name\n",
+ "0 0.201397\n",
+ "1 0.059600\n",
+ "2 0.032400\n",
+ "3 0.055500\n",
+ "4 0.107774"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(df_target, \"Cibersort_T_cell_CD8+\").head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name\n",
+ "0 1.0\n",
+ "1 0.0\n",
+ "2 NaN"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(df_source, \"TP53_TP53\").head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " WT | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Mutated | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name\n",
+ "0 WT\n",
+ "1 Mutated\n",
+ "2 NaN"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(df_target, \"TP53\").head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [source, target, similarity]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(df_source, df_target, (\"TP53_TP53\", \"TP53\"), method='tfidf')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/demo-ucec.ipynb b/examples/demo-ucec.ipynb
new file mode 100644
index 00000000..cff731c4
--- /dev/null
+++ b/examples/demo-ucec.ipynb
@@ -0,0 +1,7901 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Getting Started\n",
+ "\n",
+ "## Data Harmonization with `bdikit`\n",
+ "\n",
+ "Data harmonization is the process of integrating and aligning data from different sources into a consistent format to ensure compatibility and interoperability across data analyses and systems. `bdikit` is a library the helps with key data harmonization steps:\n",
+ "- *Schema Mapping*: In this step, data from various sources are mapped to a unified schema or model. This involves identifying equivalent table columns and establishing relationships between disparate datasets.\n",
+ "- *Value Mapping (Data Standardization)*: This step involves converting data into a common format or structure, using consistent naming conventions, units, and coding systems to ensure uniformity.\n",
+ "\n",
+ "In this example, we describe how `bdikit` can be used to harmonize datasets from two papers:\n",
+ "- Dou et al. (https://pubmed.ncbi.nlm.nih.gov/37567170/)\n",
+ "- Cao et al (https://www.cell.com/cell/fulltext/S0092-8674(21)00997-1)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Loading the data\n",
+ "\n",
+ "First, import the `bdikit` library and other libraries."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import bdikit as bdi\n",
+ "import pandas as pd\n",
+ "from IPython.display import display, Markdown"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, we load our source data using Pandas and select some columns we are interested in."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " idx | \n",
+ " Proteomics_Participant_ID | \n",
+ " Case_excluded | \n",
+ " Proteomics_TMT_batch | \n",
+ " Proteomics_TMT_plex | \n",
+ " Proteomics_TMT_channel | \n",
+ " Proteomics_Parent_Sample_IDs | \n",
+ " Proteomics_Aliquot_ID | \n",
+ " Proteomics_Tumor_Normal | \n",
+ " Proteomics_OCT | \n",
+ " ... | \n",
+ " RNAseq_R1_sample_type | \n",
+ " RNAseq_R1_filename | \n",
+ " RNAseq_R1_UUID | \n",
+ " RNAseq_R2_sample_type | \n",
+ " RNAseq_R2_filename | \n",
+ " RNAseq_R2_UUID | \n",
+ " miRNAseq_sample_type | \n",
+ " miRNAseq_UUID | \n",
+ " Methylation_available | \n",
+ " Methylation_quality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " S001 | \n",
+ " C3L-00006 | \n",
+ " No | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 128N | \n",
+ " C3L-00006-01 | \n",
+ " CPT0001460012 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_TAGCTT_S17... | \n",
+ " 8a1efc47-1c29-417f-a425-cdbd09565dcb | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_TAGCTT_S17... | \n",
+ " 8c3fe9b7-7acd-4867-8d9c-a8e5d1516eda | \n",
+ " Tumor | \n",
+ " 37bcba98-1094-459e-83ae-c23a602416fb | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " S002 | \n",
+ " C3L-00008 | \n",
+ " No | \n",
+ " 4 | \n",
+ " 16 | \n",
+ " 130N | \n",
+ " C3L-00008-01 | \n",
+ " CPT0001300009 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_GGCTAC_S22... | \n",
+ " 555725e8-cba5-4676-9b0a-80100cbf9f47 | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_GGCTAC_S22... | \n",
+ " 15235b12-b67a-4678-acc4-ed03d642bd5e | \n",
+ " Tumor | \n",
+ " 492b50d8-ec35-46e7-a65d-06512aaee394 | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " S003 | \n",
+ " C3L-00032 | \n",
+ " No | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 131 | \n",
+ " C3L-00032-01 | \n",
+ " CPT0001420009 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_GTCCGC_S18... | \n",
+ " 9ae968f3-691d-4db3-9977-1ab3e5af9085 | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_GTCCGC_S18... | \n",
+ " 423b6b09-02aa-4f47-9241-f75c1dad1161 | \n",
+ " Tumor | \n",
+ " 1794ff56-db2d-4d1a-8758-cab7fe3d98c1 | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " S004 | \n",
+ " C3L-00084 | \n",
+ " Yes | \n",
+ " 3 | \n",
+ " 11 | \n",
+ " 129N | \n",
+ " C3L-00084-01 | \n",
+ " CPT0000820012 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_... | \n",
+ " b0a7cdf2-2ad8-4442-91b0-548ea4975554 | \n",
+ " Tumor | \n",
+ " 170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_... | \n",
+ " c83987a5-1c13-4af4-b46c-218fe5f60c34 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " S005 | \n",
+ " C3L-00090 | \n",
+ " No | \n",
+ " 3 | \n",
+ " 12 | \n",
+ " 129C | \n",
+ " C3L-00090-01 | \n",
+ " CPT0001140003 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_GAGTGG_S10... | \n",
+ " 8ce5618d-9ff6-40f9-aeea-8d8e1633ae38 | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_GAGTGG_S10... | \n",
+ " 06d3fd4a-a623-4146-8500-4f1f17235253 | \n",
+ " Tumor | \n",
+ " a6524c2d-d7dd-4629-980e-b45dbdc92c49 | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " S006 | \n",
+ " C3L-00098 | \n",
+ " No | \n",
+ " 4 | \n",
+ " 14 | \n",
+ " 129N | \n",
+ " C3L-00098-02 | \n",
+ " CPT0000980012 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_TTAGGC_S8_... | \n",
+ " 31252ba9-e052-4b77-809a-f936379ae00c | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_TTAGGC_S8_... | \n",
+ " 23be22ae-de50-4d74-a7c0-c890adbc662a | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " S007 | \n",
+ " C3L-00136 | \n",
+ " No | \n",
+ " 4 | \n",
+ " 16 | \n",
+ " 129C | \n",
+ " C3L-00136-03 | \n",
+ " CPT0000730011 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170818_UNC32-K00270_0050_AHL2FHBBXX_GTCCGC_S10... | \n",
+ " df0e2942-c702-4135-81a0-fbec4439d753 | \n",
+ " Tumor | \n",
+ " 170818_UNC32-K00270_0050_AHL2FHBBXX_GTCCGC_S10... | \n",
+ " 4e1ad404-4646-4828-91b9-e3c35a4ce505 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " S008 | \n",
+ " C3L-00137 | \n",
+ " No | \n",
+ " 4 | \n",
+ " 15 | \n",
+ " 130N | \n",
+ " C3L-00137-02 | \n",
+ " CPT0002010011 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170818_UNC32-K00270_0050_AHL2FHBBXX_GTGAAA_S12... | \n",
+ " 8fcdd6a1-a7c7-41b5-8b44-e41f2237b236 | \n",
+ " Tumor | \n",
+ " 170818_UNC32-K00270_0050_AHL2FHBBXX_GTGAAA_S12... | \n",
+ " 2bea607d-6eb2-4583-90d7-7823a3d8a572 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " S009 | \n",
+ " C3L-00139 | \n",
+ " No | \n",
+ " 3 | \n",
+ " 11 | \n",
+ " 130N | \n",
+ " C3L-00139-01 | \n",
+ " CPT0001850012 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_CAGATC_S1_... | \n",
+ " 7785d5a1-a60d-41f9-86f3-e4ebc100704c | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_CAGATC_S1_... | \n",
+ " 90ced367-0342-4739-93b2-4b1a4af800c4 | \n",
+ " Tumor | \n",
+ " a02b2784-9e7f-41b1-8e53-707ae4371c45 | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " S010 | \n",
+ " C3L-00143 | \n",
+ " No | \n",
+ " 4 | \n",
+ " 14 | \n",
+ " 130C | \n",
+ " C3L-00143-01 | \n",
+ " CPT0001910016 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " ... | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_ACTTGA_S4_... | \n",
+ " 6412838b-2f70-4b14-a6ee-3c7baca09fb0 | \n",
+ " Tumor | \n",
+ " 170802_UNC31-K00269_0072_AHK3GVBBXX_ACTTGA_S4_... | \n",
+ " 5d0a26e0-2739-4f38-9350-c685b44911d3 | \n",
+ " Tumor | \n",
+ " 872be4b7-1735-48a6-a3a2-7541ec65ea87 | \n",
+ " YES | \n",
+ " PASS | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10 rows × 179 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " idx Proteomics_Participant_ID Case_excluded Proteomics_TMT_batch \\\n",
+ "0 S001 C3L-00006 No 2 \n",
+ "1 S002 C3L-00008 No 4 \n",
+ "2 S003 C3L-00032 No 1 \n",
+ "3 S004 C3L-00084 Yes 3 \n",
+ "4 S005 C3L-00090 No 3 \n",
+ "5 S006 C3L-00098 No 4 \n",
+ "6 S007 C3L-00136 No 4 \n",
+ "7 S008 C3L-00137 No 4 \n",
+ "8 S009 C3L-00139 No 3 \n",
+ "9 S010 C3L-00143 No 4 \n",
+ "\n",
+ " Proteomics_TMT_plex Proteomics_TMT_channel Proteomics_Parent_Sample_IDs \\\n",
+ "0 5 128N C3L-00006-01 \n",
+ "1 16 130N C3L-00008-01 \n",
+ "2 2 131 C3L-00032-01 \n",
+ "3 11 129N C3L-00084-01 \n",
+ "4 12 129C C3L-00090-01 \n",
+ "5 14 129N C3L-00098-02 \n",
+ "6 16 129C C3L-00136-03 \n",
+ "7 15 130N C3L-00137-02 \n",
+ "8 11 130N C3L-00139-01 \n",
+ "9 14 130C C3L-00143-01 \n",
+ "\n",
+ " Proteomics_Aliquot_ID Proteomics_Tumor_Normal Proteomics_OCT ... \\\n",
+ "0 CPT0001460012 Tumor No ... \n",
+ "1 CPT0001300009 Tumor No ... \n",
+ "2 CPT0001420009 Tumor No ... \n",
+ "3 CPT0000820012 Tumor No ... \n",
+ "4 CPT0001140003 Tumor No ... \n",
+ "5 CPT0000980012 Tumor No ... \n",
+ "6 CPT0000730011 Tumor No ... \n",
+ "7 CPT0002010011 Tumor No ... \n",
+ "8 CPT0001850012 Tumor No ... \n",
+ "9 CPT0001910016 Tumor No ... \n",
+ "\n",
+ " RNAseq_R1_sample_type RNAseq_R1_filename \\\n",
+ "0 Tumor 170802_UNC31-K00269_0072_AHK3GVBBXX_TAGCTT_S17... \n",
+ "1 Tumor 170802_UNC31-K00269_0072_AHK3GVBBXX_GGCTAC_S22... \n",
+ "2 Tumor 170802_UNC31-K00269_0072_AHK3GVBBXX_GTCCGC_S18... \n",
+ "3 Tumor 170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_... \n",
+ "4 Tumor 170802_UNC31-K00269_0072_AHK3GVBBXX_GAGTGG_S10... \n",
+ "5 Tumor 170802_UNC31-K00269_0072_AHK3GVBBXX_TTAGGC_S8_... \n",
+ "6 Tumor 170818_UNC32-K00270_0050_AHL2FHBBXX_GTCCGC_S10... \n",
+ "7 Tumor 170818_UNC32-K00270_0050_AHL2FHBBXX_GTGAAA_S12... \n",
+ "8 Tumor 170802_UNC31-K00269_0072_AHK3GVBBXX_CAGATC_S1_... \n",
+ "9 Tumor 170802_UNC31-K00269_0072_AHK3GVBBXX_ACTTGA_S4_... \n",
+ "\n",
+ " RNAseq_R1_UUID RNAseq_R2_sample_type \\\n",
+ "0 8a1efc47-1c29-417f-a425-cdbd09565dcb Tumor \n",
+ "1 555725e8-cba5-4676-9b0a-80100cbf9f47 Tumor \n",
+ "2 9ae968f3-691d-4db3-9977-1ab3e5af9085 Tumor \n",
+ "3 b0a7cdf2-2ad8-4442-91b0-548ea4975554 Tumor \n",
+ "4 8ce5618d-9ff6-40f9-aeea-8d8e1633ae38 Tumor \n",
+ "5 31252ba9-e052-4b77-809a-f936379ae00c Tumor \n",
+ "6 df0e2942-c702-4135-81a0-fbec4439d753 Tumor \n",
+ "7 8fcdd6a1-a7c7-41b5-8b44-e41f2237b236 Tumor \n",
+ "8 7785d5a1-a60d-41f9-86f3-e4ebc100704c Tumor \n",
+ "9 6412838b-2f70-4b14-a6ee-3c7baca09fb0 Tumor \n",
+ "\n",
+ " RNAseq_R2_filename \\\n",
+ "0 170802_UNC31-K00269_0072_AHK3GVBBXX_TAGCTT_S17... \n",
+ "1 170802_UNC31-K00269_0072_AHK3GVBBXX_GGCTAC_S22... \n",
+ "2 170802_UNC31-K00269_0072_AHK3GVBBXX_GTCCGC_S18... \n",
+ "3 170818_UNC32-K00270_0050_AHL2FHBBXX_ATCACG_S5_... \n",
+ "4 170802_UNC31-K00269_0072_AHK3GVBBXX_GAGTGG_S10... \n",
+ "5 170802_UNC31-K00269_0072_AHK3GVBBXX_TTAGGC_S8_... \n",
+ "6 170818_UNC32-K00270_0050_AHL2FHBBXX_GTCCGC_S10... \n",
+ "7 170818_UNC32-K00270_0050_AHL2FHBBXX_GTGAAA_S12... \n",
+ "8 170802_UNC31-K00269_0072_AHK3GVBBXX_CAGATC_S1_... \n",
+ "9 170802_UNC31-K00269_0072_AHK3GVBBXX_ACTTGA_S4_... \n",
+ "\n",
+ " RNAseq_R2_UUID miRNAseq_sample_type \\\n",
+ "0 8c3fe9b7-7acd-4867-8d9c-a8e5d1516eda Tumor \n",
+ "1 15235b12-b67a-4678-acc4-ed03d642bd5e Tumor \n",
+ "2 423b6b09-02aa-4f47-9241-f75c1dad1161 Tumor \n",
+ "3 c83987a5-1c13-4af4-b46c-218fe5f60c34 NaN \n",
+ "4 06d3fd4a-a623-4146-8500-4f1f17235253 Tumor \n",
+ "5 23be22ae-de50-4d74-a7c0-c890adbc662a NaN \n",
+ "6 4e1ad404-4646-4828-91b9-e3c35a4ce505 NaN \n",
+ "7 2bea607d-6eb2-4583-90d7-7823a3d8a572 NaN \n",
+ "8 90ced367-0342-4739-93b2-4b1a4af800c4 Tumor \n",
+ "9 5d0a26e0-2739-4f38-9350-c685b44911d3 Tumor \n",
+ "\n",
+ " miRNAseq_UUID Methylation_available \\\n",
+ "0 37bcba98-1094-459e-83ae-c23a602416fb YES \n",
+ "1 492b50d8-ec35-46e7-a65d-06512aaee394 YES \n",
+ "2 1794ff56-db2d-4d1a-8758-cab7fe3d98c1 YES \n",
+ "3 NaN YES \n",
+ "4 a6524c2d-d7dd-4629-980e-b45dbdc92c49 YES \n",
+ "5 NaN YES \n",
+ "6 NaN YES \n",
+ "7 NaN YES \n",
+ "8 a02b2784-9e7f-41b1-8e53-707ae4371c45 YES \n",
+ "9 872be4b7-1735-48a6-a3a2-7541ec65ea87 YES \n",
+ "\n",
+ " Methylation_quality \n",
+ "0 PASS \n",
+ "1 PASS \n",
+ "2 PASS \n",
+ "3 PASS \n",
+ "4 PASS \n",
+ "5 PASS \n",
+ "6 PASS \n",
+ "7 PASS \n",
+ "8 PASS \n",
+ "9 PASS \n",
+ "\n",
+ "[10 rows x 179 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_source = pd.read_csv(\"./datasets/Dou-ucec-discovery.csv\")\n",
+ "# column_names = [\n",
+ "# \"Country\",\n",
+ "# \"Gender\",\n",
+ "# \"FIGO_stage\",\n",
+ "# \"Path_Stage_Reg_Lymph_Nodes-pN\",\n",
+ "# \"tumor_Stage-Pathological\",\n",
+ "# \"Tumor_Focality\",\n",
+ "# ]\n",
+ "# df_source = df_source[column_names]\n",
+ "df_source.head(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our goal is to harmonize the data from our source table (`dou.csv`) with the data from our target table `cao.csv`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Idx | \n",
+ " Case_id | \n",
+ " Case_excluded | \n",
+ " Batch | \n",
+ " Plex | \n",
+ " ReporterName | \n",
+ " Aliquot_ID | \n",
+ " Group | \n",
+ " Discovery_study | \n",
+ " Age | \n",
+ " ... | \n",
+ " Follow-up_additional_surgery_for_new_tumor | \n",
+ " Follow-up_additional_treatment_radiation_therapy_for_new_tumor | \n",
+ " Follow-up_additional_treatment_pharmaceutical_therapy_for_new_tumor | \n",
+ " Follow-up_additional_treatment_immuno_for_new_tumor | \n",
+ " Follow-up_days_from_date_of_collection_to_date_of_last_contact | \n",
+ " Follow-up_cause_of_death | \n",
+ " Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_death | \n",
+ " Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor | \n",
+ " Follow-up_procedure_type_of_new_tumor | \n",
+ " Follow-up_residual_tumor_after_surgery_for_new_tumor | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C3L-00086 | \n",
+ " C3L-00086 | \n",
+ " No | \n",
+ " b4 | \n",
+ " 16.0 | \n",
+ " 128N | \n",
+ " CPT0092460003 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " 56 | \n",
+ " ... | \n",
+ " n/a|No|No|No|No | \n",
+ " n/a|Yes|Yes|Yes|Yes | \n",
+ " n/a|Yes|Yes|Yes|Yes | \n",
+ " n/a|No|No|No|No | \n",
+ " 330.0|701.0|1046.0|1436.0|n/a | \n",
+ " n/a|n/a|n/a|n/a|Breast Carcinoma | \n",
+ " n/a|n/a|n/a|n/a|1578.0 | \n",
+ " n/a|n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a|n/a | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " C3L-00898 | \n",
+ " C3L-00898 | \n",
+ " No | \n",
+ " b4 | \n",
+ " 14.0 | \n",
+ " 128C | \n",
+ " CPT0172200008 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " 54 | \n",
+ " ... | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " 396.0|746.0|982.0|1600.0 | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C3L-00943 | \n",
+ " C3L-00943 | \n",
+ " No | \n",
+ " b4 | \n",
+ " 15.0 | \n",
+ " 130C | \n",
+ " CPT0086090003 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " 63 | \n",
+ " ... | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " 237.0|693.0|1039.0 | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " C3L-01064 | \n",
+ " C3L-01064 | \n",
+ " No | \n",
+ " b3 | \n",
+ " 9.0 | \n",
+ " 129N | \n",
+ " CPT0113430004 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " 54 | \n",
+ " ... | \n",
+ " No|No|No|No | \n",
+ " No|Yes|No|No | \n",
+ " Yes|Yes|Yes|Yes | \n",
+ " No|No|No|No | \n",
+ " 453.0|726.0|1062.0|1447.0 | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a|n/a | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " C3L-01277 | \n",
+ " C3L-01277 | \n",
+ " No | \n",
+ " b4 | \n",
+ " 13.0 | \n",
+ " 130N | \n",
+ " CPT0093170003 | \n",
+ " Tumor | \n",
+ " No | \n",
+ " 61 | \n",
+ " ... | \n",
+ " n/a|No|No | \n",
+ " n/a|No|Yes | \n",
+ " n/a|Yes|No | \n",
+ " n/a|No|No | \n",
+ " 351.0|713.0|967.0 | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ " n/a|n/a|n/a | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 213 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Idx Case_id Case_excluded Batch Plex ReporterName Aliquot_ID \\\n",
+ "0 C3L-00086 C3L-00086 No b4 16.0 128N CPT0092460003 \n",
+ "1 C3L-00898 C3L-00898 No b4 14.0 128C CPT0172200008 \n",
+ "2 C3L-00943 C3L-00943 No b4 15.0 130C CPT0086090003 \n",
+ "3 C3L-01064 C3L-01064 No b3 9.0 129N CPT0113430004 \n",
+ "4 C3L-01277 C3L-01277 No b4 13.0 130N CPT0093170003 \n",
+ "\n",
+ " Group Discovery_study Age ... Follow-up_additional_surgery_for_new_tumor \\\n",
+ "0 Tumor No 56 ... n/a|No|No|No|No \n",
+ "1 Tumor No 54 ... n/a|n/a|n/a|n/a \n",
+ "2 Tumor No 63 ... n/a|n/a|n/a \n",
+ "3 Tumor No 54 ... No|No|No|No \n",
+ "4 Tumor No 61 ... n/a|No|No \n",
+ "\n",
+ " Follow-up_additional_treatment_radiation_therapy_for_new_tumor \\\n",
+ "0 n/a|Yes|Yes|Yes|Yes \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 No|Yes|No|No \n",
+ "4 n/a|No|Yes \n",
+ "\n",
+ " Follow-up_additional_treatment_pharmaceutical_therapy_for_new_tumor \\\n",
+ "0 n/a|Yes|Yes|Yes|Yes \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 Yes|Yes|Yes|Yes \n",
+ "4 n/a|Yes|No \n",
+ "\n",
+ " Follow-up_additional_treatment_immuno_for_new_tumor \\\n",
+ "0 n/a|No|No|No|No \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 No|No|No|No \n",
+ "4 n/a|No|No \n",
+ "\n",
+ " Follow-up_days_from_date_of_collection_to_date_of_last_contact \\\n",
+ "0 330.0|701.0|1046.0|1436.0|n/a \n",
+ "1 396.0|746.0|982.0|1600.0 \n",
+ "2 237.0|693.0|1039.0 \n",
+ "3 453.0|726.0|1062.0|1447.0 \n",
+ "4 351.0|713.0|967.0 \n",
+ "\n",
+ " Follow-up_cause_of_death \\\n",
+ "0 n/a|n/a|n/a|n/a|Breast Carcinoma \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 n/a|n/a|n/a|n/a \n",
+ "4 n/a|n/a|n/a \n",
+ "\n",
+ " Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_death \\\n",
+ "0 n/a|n/a|n/a|n/a|1578.0 \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 n/a|n/a|n/a|n/a \n",
+ "4 n/a|n/a|n/a \n",
+ "\n",
+ " Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor \\\n",
+ "0 n/a|n/a|n/a|n/a|n/a \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 n/a|n/a|n/a|n/a \n",
+ "4 n/a|n/a|n/a \n",
+ "\n",
+ " Follow-up_procedure_type_of_new_tumor \\\n",
+ "0 n/a|n/a|n/a|n/a|n/a \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 n/a|n/a|n/a|n/a \n",
+ "4 n/a|n/a|n/a \n",
+ "\n",
+ " Follow-up_residual_tumor_after_surgery_for_new_tumor \n",
+ "0 n/a|n/a|n/a|n/a|n/a \n",
+ "1 n/a|n/a|n/a|n/a \n",
+ "2 n/a|n/a|n/a \n",
+ "3 n/a|n/a|n/a|n/a \n",
+ "4 n/a|n/a|n/a \n",
+ "\n",
+ "[5 rows x 213 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_target = pd.read_csv(\"./datasets/Dou-ucec-confirmatory.csv\")\n",
+ "df_target.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Finding column matches between two tables\n",
+ "\n",
+ "`bdi-kit` offers a suite of functions to help with data harmonization tasks.\n",
+ "\n",
+ "For instance, it can help automatically discover one-to-one mappings between the source and target dataset columns.\n",
+ "\n",
+ "To do so using `bdi-kit`, we can use the `match_schema()` function to match columns of the two schemas as follows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 179 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "fa1a83a2181547f49d962d9750c579ac",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/179 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 213 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "25820a1b76c64c9db6127f064cd504e8",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/213 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " idx | \n",
+ " xCell_T_cell_CD4+_Th1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Idx | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Case_excluded | \n",
+ " Case_excluded | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Proteomics_TMT_batch | \n",
+ " ABSOLUTE_tumor_purity | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Proteomics_TMT_plex | \n",
+ " Number_of_para-aortic_lymph_nodes_examined | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 174 | \n",
+ " RNAseq_R2_UUID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 175 | \n",
+ " miRNAseq_sample_type | \n",
+ " Mutation_signature_SBS7a | \n",
+ "
\n",
+ " \n",
+ " 176 | \n",
+ " miRNAseq_UUID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 177 | \n",
+ " Methylation_available | \n",
+ " Mutation_signature_SBS42 | \n",
+ "
\n",
+ " \n",
+ " 178 | \n",
+ " Methylation_quality | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
179 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 idx xCell_T_cell_CD4+_Th1\n",
+ "1 Proteomics_Participant_ID Idx\n",
+ "2 Case_excluded Case_excluded\n",
+ "3 Proteomics_TMT_batch ABSOLUTE_tumor_purity\n",
+ "4 Proteomics_TMT_plex Number_of_para-aortic_lymph_nodes_examined\n",
+ ".. ... ...\n",
+ "174 RNAseq_R2_UUID Case_id\n",
+ "175 miRNAseq_sample_type Mutation_signature_SBS7a\n",
+ "176 miRNAseq_UUID Case_id\n",
+ "177 Methylation_available Mutation_signature_SBS42\n",
+ "178 Methylation_quality MSI_status\n",
+ "\n",
+ "[179 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_schema(df_source, df_target, method=\"ct_learning\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 179 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0cd8ea67b72649aa9df0b1a3fcbafd8e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/179 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 213 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ab8e9932498b44918c60ef709c151d16",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/213 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " idx | \n",
+ " xCell_T_cell_CD4+_Th1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Proteomics_Participant_ID | \n",
+ " Idx | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Case_excluded | \n",
+ " Case_excluded | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Proteomics_TMT_batch | \n",
+ " ABSOLUTE_tumor_purity | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Proteomics_TMT_plex | \n",
+ " Number_of_para-aortic_lymph_nodes_examined | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Proteomics_TMT_channel | \n",
+ " ReporterName | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Proteomics_Parent_Sample_IDs | \n",
+ " Idx | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Proteomics_Aliquot_ID | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Proteomics_Tumor_Normal | \n",
+ " Group | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Proteomics_OCT | \n",
+ " POLE | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Country | \n",
+ " Participant_country | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Histologic_Grade_FIGO | \n",
+ " Histologic_grade | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Myometrial_invasion_Specify | \n",
+ " Myometrial_invasion_present_specify | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Histologic_type | \n",
+ " Histologic_Type | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Treatment_naive | \n",
+ " Follow-up_additional_treatment_radiation_thera... | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Tumor_purity | \n",
+ " Tumor_necrosis | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Clin_Stage_Dist_Mets-cM | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Path_Stage_Dist_Mets-pM | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " tumor_Stage-Pathological | \n",
+ " Tumor_stage_pathological | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " FIGO_stage | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " LVSI | \n",
+ " Number_of_pelvic_lymph_nodes_examined | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " BMI | \n",
+ " BMI | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Age | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Diabetes | \n",
+ " Diabetes | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Race | \n",
+ " Race | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Ethnicity | \n",
+ " Ethnicity | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Gender | \n",
+ " Sex | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Tumor_Site | \n",
+ " Tumor_site | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Tumor_Site_Other | \n",
+ " Tumor_site_other | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Tumor_Focality | \n",
+ " Tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Tumor_Size_cm | \n",
+ " Tumor_size_cm | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Estrogen_Receptor | \n",
+ " Ancillary_studies_estrogen_receptor | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Estrogen_Receptor_% | \n",
+ " Ancillary_studies_estrogen_receptor | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " Progesterone_Receptor | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Progesterone_Receptor_% | \n",
+ " Ancillary_studies_progesterone_receptor | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " MLH1 | \n",
+ " Ancillary_studies_mlh1 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " MLH2 | \n",
+ " Ancillary_studies_mlh1 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " MSH6 | \n",
+ " Ancillary_studies_msh2 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " PMS2 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " p53 | \n",
+ " Ancillary_studies_p53 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " Other_IHC_specify | \n",
+ " Ancillary_studies_other_immunohistochemistry_t... | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " MLH1_Promoter_Hypermethylation | \n",
+ " Ancillary_studies_mlh1_promoter_hypermethylation | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " Num_full_term_pregnancies | \n",
+ " Donor_information_number_of_full_term_pregnancies | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " EPIC_Bcells | \n",
+ " Progeny_JAK.STAT | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " EPIC_CAFs | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " EPIC_CD4_Tcells | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " EPIC_CD8_Tcells | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " EPIC_Endothelial | \n",
+ " Progeny_PI3K | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " EPIC_Macrophages | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " EPIC_NKcells | \n",
+ " xCell_NK_cell | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " EPIC_otherCells | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " CIBERSORT_B _cells _naive | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " CIBERSORT_B _cells _memory | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " CIBERSORT_Plasma _cells | \n",
+ " Progeny_PI3K | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " CIBERSORT_T _cells _CD8 | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " CIBERSORT_T _cells _CD4 _naive | \n",
+ " Cibersort_T_cell_CD4+_naive | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " CIBERSORT_T _cells _CD4 _memory _resting | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " CIBERSORT_T _cells _CD4 _memory _activated | \n",
+ " xCell_T_cell_CD4+_Th2 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " CIBERSORT_T _cells _follicular _helper | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " CIBERSORT_T _cells _regulatory _(Tregs) | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " CIBERSORT_T _cells _gamma _delta | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " CIBERSORT_NK _cells _resting | \n",
+ " Cibersort_NK_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " CIBERSORT_NK _cells _activated | \n",
+ " Cibersort_NK_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " CIBERSORT_Monocytes | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " CIBERSORT_Macrophages _M0 | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " CIBERSORT_Macrophages _M1 | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " CIBERSORT_Macrophages _M2 | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " CIBERSORT_Dendritic _cells _resting | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " CIBERSORT_Dendritic _cells _activated | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 71 | \n",
+ " CIBERSORT_Mast _cells _resting | \n",
+ " Cibersort_Mast_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " CIBERSORT_Mast _cells _activated | \n",
+ " Cibersort_Mast_cell_activated | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " CIBERSORT_Eosinophils | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " CIBERSORT_Neutrophils | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " CIBERSORT_Absolute _score | \n",
+ " Cibersort_T_cell_follicular_helper | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " ESTIMATE_StromalScore | \n",
+ " Estimate_StromalScore | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " ESTIMATE_ImmuneScore | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " ESTIMATE_ESTIMATEScore | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " Stemness_score | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " ER_ESR1 | \n",
+ " xCell_stroma_score | \n",
+ "
\n",
+ " \n",
+ " 81 | \n",
+ " PR_PGR | \n",
+ " Progeny_Trail | \n",
+ "
\n",
+ " \n",
+ " 82 | \n",
+ " Pathway_activity_EGFR | \n",
+ " Progeny_Trail | \n",
+ "
\n",
+ " \n",
+ " 83 | \n",
+ " Pathway_activity_Hypoxia | \n",
+ " xCell_stroma_score | \n",
+ "
\n",
+ " \n",
+ " 84 | \n",
+ " Pathway_activity_JAK.STAT | \n",
+ " Mutation_signature_SBS1 | \n",
+ "
\n",
+ " \n",
+ " 85 | \n",
+ " Pathway_activity_MAPK | \n",
+ " BMI | \n",
+ "
\n",
+ " \n",
+ " 86 | \n",
+ " Pathway_activity_NFkB | \n",
+ " Progeny_NFkB | \n",
+ "
\n",
+ " \n",
+ " 87 | \n",
+ " Pathway_activity_PI3K | \n",
+ " Pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " Pathway_activity_TGFb | \n",
+ " ABSOLUTE_tumor_purity | \n",
+ "
\n",
+ " \n",
+ " 89 | \n",
+ " Pathway_activity_TNFa | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 90 | \n",
+ " Pathway_activity_Trail | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 91 | \n",
+ " Pathway_activity_VEGF | \n",
+ " Progeny_VEGF | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " Pathway_activity_p53 | \n",
+ " xCell_stroma_score | \n",
+ "
\n",
+ " \n",
+ " 93 | \n",
+ " TP53_ATM | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 94 | \n",
+ " TP53_CHEK2 | \n",
+ " Cibersort_T_cell_CD4+_naive | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " TP53_MDM4 | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " TP53_RPS6KA3 | \n",
+ " Ancillary_studies_pms2 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " TP53_TP53 | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " TP53_pathway | \n",
+ " TP53 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " PI3K_AKT1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " PI3K_AKT2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " PI3K_AKT3 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " PI3K_DEPDC5 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " PI3K_DEPTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 104 | \n",
+ " PI3K_INPP4B | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 105 | \n",
+ " PI3K_MAPKAP1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 106 | \n",
+ " PI3K_MLST8 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 107 | \n",
+ " PI3K_MTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 108 | \n",
+ " PI3K_NPRL2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 109 | \n",
+ " PI3K_NPRL3 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 110 | \n",
+ " PI3K_PDK1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 111 | \n",
+ " PI3K_PIK3CA | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 112 | \n",
+ " PI3K_PIK3CB | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 113 | \n",
+ " PI3K_PIK3R1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 114 | \n",
+ " PI3K_PIK3R2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 115 | \n",
+ " PI3K_PPP2R1A | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 116 | \n",
+ " PI3K_PTEN | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 117 | \n",
+ " PI3K_RHEB | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 118 | \n",
+ " PI3K_RICTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 119 | \n",
+ " PI3K_RPS6 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 120 | \n",
+ " PI3K_RPS6KB1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 121 | \n",
+ " PI3K_RPTOR | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 122 | \n",
+ " PI3K_STK11 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 123 | \n",
+ " PI3K_TSC1 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 124 | \n",
+ " PI3K_TSC2 | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 125 | \n",
+ " PI3K_pathway | \n",
+ " PIK3CA | \n",
+ "
\n",
+ " \n",
+ " 126 | \n",
+ " HRD_BRCA1 | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 127 | \n",
+ " HRD_BRCA2 | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 128 | \n",
+ " HRD_BRCA1_or_BRCA2 | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 129 | \n",
+ " CNV_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 130 | \n",
+ " CNV_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 131 | \n",
+ " CNV_class | \n",
+ " CNV_status | \n",
+ "
\n",
+ " \n",
+ " 132 | \n",
+ " CNV_idx | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 133 | \n",
+ " CNV_1q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 134 | \n",
+ " CNV_3q_DEL | \n",
+ " Idx | \n",
+ "
\n",
+ " \n",
+ " 135 | \n",
+ " CNV_4q_DEL | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " CNV_1q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 137 | \n",
+ " CNV_3q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 138 | \n",
+ " CNV_4q_AMP | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 139 | \n",
+ " Purity_Immune | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 140 | \n",
+ " Purity_Cancer | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 141 | \n",
+ " Purity_Stroma | \n",
+ " Progeny_Androgen | \n",
+ "
\n",
+ " \n",
+ " 142 | \n",
+ " MSI_status | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ " 143 | \n",
+ " POLE_subtype | \n",
+ " POLE | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " JAK1_MS_INDEL | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ " 145 | \n",
+ " JAK1_Mutation | \n",
+ " BMI | \n",
+ "
\n",
+ " \n",
+ " 146 | \n",
+ " Log2_variant_per_Mbp | \n",
+ " Progeny_MAPK | \n",
+ "
\n",
+ " \n",
+ " 147 | \n",
+ " Log2_SNP_per_Mbp | \n",
+ " Progeny_JAK.STAT | \n",
+ "
\n",
+ " \n",
+ " 148 | \n",
+ " Log2_INDEL_per_Mbp | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ "
\n",
+ " \n",
+ " 149 | \n",
+ " Log2_variant_total | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 150 | \n",
+ " Log2_SNP_total | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 151 | \n",
+ " Log2_INDEL_total | \n",
+ " Estimate_ESTIMATEScore | \n",
+ "
\n",
+ " \n",
+ " 152 | \n",
+ " Genomics_subtype | \n",
+ " Genomic_subtype | \n",
+ "
\n",
+ " \n",
+ " 153 | \n",
+ " Mutation_signature_C>A | \n",
+ " CNV_ratio | \n",
+ "
\n",
+ " \n",
+ " 154 | \n",
+ " Mutation_signature_C>G | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 155 | \n",
+ " Mutation_signature_C>T | \n",
+ " xCell_Cancer_associated_fibroblast | \n",
+ "
\n",
+ " \n",
+ " 156 | \n",
+ " Mutation_signature_T>C | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ "
\n",
+ " \n",
+ " 157 | \n",
+ " Mutation_signature_T>A | \n",
+ " Progeny_Hypoxia | \n",
+ "
\n",
+ " \n",
+ " 158 | \n",
+ " Mutation_signature_T>G | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 159 | \n",
+ " WXS_normal_sample_type | \n",
+ " Batch | \n",
+ "
\n",
+ " \n",
+ " 160 | \n",
+ " WXS_normal_filename | \n",
+ " Progeny_WNT | \n",
+ "
\n",
+ " \n",
+ " 161 | \n",
+ " WXS_normal_UUID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 162 | \n",
+ " WXS_tumor_sample_type | \n",
+ " Tumor_site | \n",
+ "
\n",
+ " \n",
+ " 163 | \n",
+ " WXS_tumor_filename | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 164 | \n",
+ " WXS_tumor_UUID | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 165 | \n",
+ " WGS_normal_sample_type | \n",
+ " Mutation_signature_SBS7a | \n",
+ "
\n",
+ " \n",
+ " 166 | \n",
+ " WGS_normal_UUID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 167 | \n",
+ " WGS_tumor_sample_type | \n",
+ " Tumor_site | \n",
+ "
\n",
+ " \n",
+ " 168 | \n",
+ " WGS_tumor_UUID | \n",
+ " Progeny_TGFb | \n",
+ "
\n",
+ " \n",
+ " 169 | \n",
+ " RNAseq_R1_sample_type | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 170 | \n",
+ " RNAseq_R1_filename | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 171 | \n",
+ " RNAseq_R1_UUID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 172 | \n",
+ " RNAseq_R2_sample_type | \n",
+ " ARID1A | \n",
+ "
\n",
+ " \n",
+ " 173 | \n",
+ " RNAseq_R2_filename | \n",
+ " Aliquot_ID | \n",
+ "
\n",
+ " \n",
+ " 174 | \n",
+ " RNAseq_R2_UUID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 175 | \n",
+ " miRNAseq_sample_type | \n",
+ " Mutation_signature_SBS7a | \n",
+ "
\n",
+ " \n",
+ " 176 | \n",
+ " miRNAseq_UUID | \n",
+ " Case_id | \n",
+ "
\n",
+ " \n",
+ " 177 | \n",
+ " Methylation_available | \n",
+ " Mutation_signature_SBS42 | \n",
+ "
\n",
+ " \n",
+ " 178 | \n",
+ " Methylation_quality | \n",
+ " MSI_status | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source \\\n",
+ "0 idx \n",
+ "1 Proteomics_Participant_ID \n",
+ "2 Case_excluded \n",
+ "3 Proteomics_TMT_batch \n",
+ "4 Proteomics_TMT_plex \n",
+ "5 Proteomics_TMT_channel \n",
+ "6 Proteomics_Parent_Sample_IDs \n",
+ "7 Proteomics_Aliquot_ID \n",
+ "8 Proteomics_Tumor_Normal \n",
+ "9 Proteomics_OCT \n",
+ "10 Country \n",
+ "11 Histologic_Grade_FIGO \n",
+ "12 Myometrial_invasion_Specify \n",
+ "13 Histologic_type \n",
+ "14 Treatment_naive \n",
+ "15 Tumor_purity \n",
+ "16 Path_Stage_Primary_Tumor-pT \n",
+ "17 Path_Stage_Reg_Lymph_Nodes-pN \n",
+ "18 Clin_Stage_Dist_Mets-cM \n",
+ "19 Path_Stage_Dist_Mets-pM \n",
+ "20 tumor_Stage-Pathological \n",
+ "21 FIGO_stage \n",
+ "22 LVSI \n",
+ "23 BMI \n",
+ "24 Age \n",
+ "25 Diabetes \n",
+ "26 Race \n",
+ "27 Ethnicity \n",
+ "28 Gender \n",
+ "29 Tumor_Site \n",
+ "30 Tumor_Site_Other \n",
+ "31 Tumor_Focality \n",
+ "32 Tumor_Size_cm \n",
+ "33 Estrogen_Receptor \n",
+ "34 Estrogen_Receptor_% \n",
+ "35 Progesterone_Receptor \n",
+ "36 Progesterone_Receptor_% \n",
+ "37 MLH1 \n",
+ "38 MLH2 \n",
+ "39 MSH6 \n",
+ "40 PMS2 \n",
+ "41 p53 \n",
+ "42 Other_IHC_specify \n",
+ "43 MLH1_Promoter_Hypermethylation \n",
+ "44 Num_full_term_pregnancies \n",
+ "45 EPIC_Bcells \n",
+ "46 EPIC_CAFs \n",
+ "47 EPIC_CD4_Tcells \n",
+ "48 EPIC_CD8_Tcells \n",
+ "49 EPIC_Endothelial \n",
+ "50 EPIC_Macrophages \n",
+ "51 EPIC_NKcells \n",
+ "52 EPIC_otherCells \n",
+ "53 CIBERSORT_B _cells _naive \n",
+ "54 CIBERSORT_B _cells _memory \n",
+ "55 CIBERSORT_Plasma _cells \n",
+ "56 CIBERSORT_T _cells _CD8 \n",
+ "57 CIBERSORT_T _cells _CD4 _naive \n",
+ "58 CIBERSORT_T _cells _CD4 _memory _resting \n",
+ "59 CIBERSORT_T _cells _CD4 _memory _activated \n",
+ "60 CIBERSORT_T _cells _follicular _helper \n",
+ "61 CIBERSORT_T _cells _regulatory _(Tregs) \n",
+ "62 CIBERSORT_T _cells _gamma _delta \n",
+ "63 CIBERSORT_NK _cells _resting \n",
+ "64 CIBERSORT_NK _cells _activated \n",
+ "65 CIBERSORT_Monocytes \n",
+ "66 CIBERSORT_Macrophages _M0 \n",
+ "67 CIBERSORT_Macrophages _M1 \n",
+ "68 CIBERSORT_Macrophages _M2 \n",
+ "69 CIBERSORT_Dendritic _cells _resting \n",
+ "70 CIBERSORT_Dendritic _cells _activated \n",
+ "71 CIBERSORT_Mast _cells _resting \n",
+ "72 CIBERSORT_Mast _cells _activated \n",
+ "73 CIBERSORT_Eosinophils \n",
+ "74 CIBERSORT_Neutrophils \n",
+ "75 CIBERSORT_Absolute _score \n",
+ "76 ESTIMATE_StromalScore \n",
+ "77 ESTIMATE_ImmuneScore \n",
+ "78 ESTIMATE_ESTIMATEScore \n",
+ "79 Stemness_score \n",
+ "80 ER_ESR1 \n",
+ "81 PR_PGR \n",
+ "82 Pathway_activity_EGFR \n",
+ "83 Pathway_activity_Hypoxia \n",
+ "84 Pathway_activity_JAK.STAT \n",
+ "85 Pathway_activity_MAPK \n",
+ "86 Pathway_activity_NFkB \n",
+ "87 Pathway_activity_PI3K \n",
+ "88 Pathway_activity_TGFb \n",
+ "89 Pathway_activity_TNFa \n",
+ "90 Pathway_activity_Trail \n",
+ "91 Pathway_activity_VEGF \n",
+ "92 Pathway_activity_p53 \n",
+ "93 TP53_ATM \n",
+ "94 TP53_CHEK2 \n",
+ "95 TP53_MDM4 \n",
+ "96 TP53_RPS6KA3 \n",
+ "97 TP53_TP53 \n",
+ "98 TP53_pathway \n",
+ "99 PI3K_AKT1 \n",
+ "100 PI3K_AKT2 \n",
+ "101 PI3K_AKT3 \n",
+ "102 PI3K_DEPDC5 \n",
+ "103 PI3K_DEPTOR \n",
+ "104 PI3K_INPP4B \n",
+ "105 PI3K_MAPKAP1 \n",
+ "106 PI3K_MLST8 \n",
+ "107 PI3K_MTOR \n",
+ "108 PI3K_NPRL2 \n",
+ "109 PI3K_NPRL3 \n",
+ "110 PI3K_PDK1 \n",
+ "111 PI3K_PIK3CA \n",
+ "112 PI3K_PIK3CB \n",
+ "113 PI3K_PIK3R1 \n",
+ "114 PI3K_PIK3R2 \n",
+ "115 PI3K_PPP2R1A \n",
+ "116 PI3K_PTEN \n",
+ "117 PI3K_RHEB \n",
+ "118 PI3K_RICTOR \n",
+ "119 PI3K_RPS6 \n",
+ "120 PI3K_RPS6KB1 \n",
+ "121 PI3K_RPTOR \n",
+ "122 PI3K_STK11 \n",
+ "123 PI3K_TSC1 \n",
+ "124 PI3K_TSC2 \n",
+ "125 PI3K_pathway \n",
+ "126 HRD_BRCA1 \n",
+ "127 HRD_BRCA2 \n",
+ "128 HRD_BRCA1_or_BRCA2 \n",
+ "129 CNV_DEL \n",
+ "130 CNV_AMP \n",
+ "131 CNV_class \n",
+ "132 CNV_idx \n",
+ "133 CNV_1q_DEL \n",
+ "134 CNV_3q_DEL \n",
+ "135 CNV_4q_DEL \n",
+ "136 CNV_1q_AMP \n",
+ "137 CNV_3q_AMP \n",
+ "138 CNV_4q_AMP \n",
+ "139 Purity_Immune \n",
+ "140 Purity_Cancer \n",
+ "141 Purity_Stroma \n",
+ "142 MSI_status \n",
+ "143 POLE_subtype \n",
+ "144 JAK1_MS_INDEL \n",
+ "145 JAK1_Mutation \n",
+ "146 Log2_variant_per_Mbp \n",
+ "147 Log2_SNP_per_Mbp \n",
+ "148 Log2_INDEL_per_Mbp \n",
+ "149 Log2_variant_total \n",
+ "150 Log2_SNP_total \n",
+ "151 Log2_INDEL_total \n",
+ "152 Genomics_subtype \n",
+ "153 Mutation_signature_C>A \n",
+ "154 Mutation_signature_C>G \n",
+ "155 Mutation_signature_C>T \n",
+ "156 Mutation_signature_T>C \n",
+ "157 Mutation_signature_T>A \n",
+ "158 Mutation_signature_T>G \n",
+ "159 WXS_normal_sample_type \n",
+ "160 WXS_normal_filename \n",
+ "161 WXS_normal_UUID \n",
+ "162 WXS_tumor_sample_type \n",
+ "163 WXS_tumor_filename \n",
+ "164 WXS_tumor_UUID \n",
+ "165 WGS_normal_sample_type \n",
+ "166 WGS_normal_UUID \n",
+ "167 WGS_tumor_sample_type \n",
+ "168 WGS_tumor_UUID \n",
+ "169 RNAseq_R1_sample_type \n",
+ "170 RNAseq_R1_filename \n",
+ "171 RNAseq_R1_UUID \n",
+ "172 RNAseq_R2_sample_type \n",
+ "173 RNAseq_R2_filename \n",
+ "174 RNAseq_R2_UUID \n",
+ "175 miRNAseq_sample_type \n",
+ "176 miRNAseq_UUID \n",
+ "177 Methylation_available \n",
+ "178 Methylation_quality \n",
+ "\n",
+ " target \n",
+ "0 xCell_T_cell_CD4+_Th1 \n",
+ "1 Idx \n",
+ "2 Case_excluded \n",
+ "3 ABSOLUTE_tumor_purity \n",
+ "4 Number_of_para-aortic_lymph_nodes_examined \n",
+ "5 ReporterName \n",
+ "6 Idx \n",
+ "7 Aliquot_ID \n",
+ "8 Group \n",
+ "9 POLE \n",
+ "10 Participant_country \n",
+ "11 Histologic_grade \n",
+ "12 Myometrial_invasion_present_specify \n",
+ "13 Histologic_Type \n",
+ "14 Follow-up_additional_treatment_radiation_thera... \n",
+ "15 Tumor_necrosis \n",
+ "16 Pathologic_staging_primary_tumor_pt \n",
+ "17 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "18 Clinical_staging_distant_metastasis_cm \n",
+ "19 Clinical_staging_distant_metastasis_cm \n",
+ "20 Tumor_stage_pathological \n",
+ "21 Pathologic_staging_primary_tumor_pt \n",
+ "22 Number_of_pelvic_lymph_nodes_examined \n",
+ "23 BMI \n",
+ "24 Age \n",
+ "25 Diabetes \n",
+ "26 Race \n",
+ "27 Ethnicity \n",
+ "28 Sex \n",
+ "29 Tumor_site \n",
+ "30 Tumor_site_other \n",
+ "31 Tumor_focality \n",
+ "32 Tumor_size_cm \n",
+ "33 Ancillary_studies_estrogen_receptor \n",
+ "34 Ancillary_studies_estrogen_receptor \n",
+ "35 Ancillary_studies_progesterone_receptor \n",
+ "36 Ancillary_studies_progesterone_receptor \n",
+ "37 Ancillary_studies_mlh1 \n",
+ "38 Ancillary_studies_mlh1 \n",
+ "39 Ancillary_studies_msh2 \n",
+ "40 Ancillary_studies_pms2 \n",
+ "41 Ancillary_studies_p53 \n",
+ "42 Ancillary_studies_other_immunohistochemistry_t... \n",
+ "43 Ancillary_studies_mlh1_promoter_hypermethylation \n",
+ "44 Donor_information_number_of_full_term_pregnancies \n",
+ "45 Progeny_JAK.STAT \n",
+ "46 Aliquot_ID \n",
+ "47 Aliquot_ID \n",
+ "48 xCell_T_cell_CD4+_Th2 \n",
+ "49 Progeny_PI3K \n",
+ "50 Cibersort_Macrophage_M2 \n",
+ "51 xCell_NK_cell \n",
+ "52 xCell_Cancer_associated_fibroblast \n",
+ "53 xCell_Cancer_associated_fibroblast \n",
+ "54 xCell_Cancer_associated_fibroblast \n",
+ "55 Progeny_PI3K \n",
+ "56 xCell_T_cell_CD4+_Th2 \n",
+ "57 Cibersort_T_cell_CD4+_naive \n",
+ "58 CNV_ratio \n",
+ "59 xCell_T_cell_CD4+_Th2 \n",
+ "60 xCell_Cancer_associated_fibroblast \n",
+ "61 Progeny_TGFb \n",
+ "62 Progeny_TGFb \n",
+ "63 Cibersort_NK_cell_activated \n",
+ "64 Cibersort_NK_cell_activated \n",
+ "65 Cibersort_Macrophage_M2 \n",
+ "66 Cibersort_Macrophage_M0 \n",
+ "67 Cibersort_Macrophage_M0 \n",
+ "68 Cibersort_Macrophage_M2 \n",
+ "69 xCell_Cancer_associated_fibroblast \n",
+ "70 xCell_Cancer_associated_fibroblast \n",
+ "71 Cibersort_Mast_cell_activated \n",
+ "72 Cibersort_Mast_cell_activated \n",
+ "73 xCell_Cancer_associated_fibroblast \n",
+ "74 CNV_ratio \n",
+ "75 Cibersort_T_cell_follicular_helper \n",
+ "76 Estimate_StromalScore \n",
+ "77 Estimate_ESTIMATEScore \n",
+ "78 Estimate_ESTIMATEScore \n",
+ "79 Progeny_Androgen \n",
+ "80 xCell_stroma_score \n",
+ "81 Progeny_Trail \n",
+ "82 Progeny_Trail \n",
+ "83 xCell_stroma_score \n",
+ "84 Mutation_signature_SBS1 \n",
+ "85 BMI \n",
+ "86 Progeny_NFkB \n",
+ "87 Pathologic_staging_regional_lymph_nodes_pn \n",
+ "88 ABSOLUTE_tumor_purity \n",
+ "89 Pathologic_staging_primary_tumor_pt \n",
+ "90 Pathologic_staging_primary_tumor_pt \n",
+ "91 Progeny_VEGF \n",
+ "92 xCell_stroma_score \n",
+ "93 TP53 \n",
+ "94 Cibersort_T_cell_CD4+_naive \n",
+ "95 TP53 \n",
+ "96 Ancillary_studies_pms2 \n",
+ "97 TP53 \n",
+ "98 TP53 \n",
+ "99 PIK3CA \n",
+ "100 PIK3CA \n",
+ "101 PIK3CA \n",
+ "102 PIK3CA \n",
+ "103 PIK3CA \n",
+ "104 PIK3CA \n",
+ "105 PIK3CA \n",
+ "106 PIK3CA \n",
+ "107 PIK3CA \n",
+ "108 PIK3CA \n",
+ "109 PIK3CA \n",
+ "110 PIK3CA \n",
+ "111 PIK3CA \n",
+ "112 PIK3CA \n",
+ "113 PIK3CA \n",
+ "114 PIK3CA \n",
+ "115 PIK3CA \n",
+ "116 PIK3CA \n",
+ "117 PIK3CA \n",
+ "118 PIK3CA \n",
+ "119 PIK3CA \n",
+ "120 PIK3CA \n",
+ "121 PIK3CA \n",
+ "122 PIK3CA \n",
+ "123 PIK3CA \n",
+ "124 PIK3CA \n",
+ "125 PIK3CA \n",
+ "126 Batch \n",
+ "127 Batch \n",
+ "128 Batch \n",
+ "129 CNV_ratio \n",
+ "130 CNV_ratio \n",
+ "131 CNV_status \n",
+ "132 CNV_ratio \n",
+ "133 CNV_ratio \n",
+ "134 Idx \n",
+ "135 CNV_ratio \n",
+ "136 CNV_ratio \n",
+ "137 CNV_ratio \n",
+ "138 CNV_ratio \n",
+ "139 Progeny_Androgen \n",
+ "140 Progeny_Androgen \n",
+ "141 Progeny_Androgen \n",
+ "142 MSI_status \n",
+ "143 POLE \n",
+ "144 MSI_status \n",
+ "145 BMI \n",
+ "146 Progeny_MAPK \n",
+ "147 Progeny_JAK.STAT \n",
+ "148 Cibersort_Macrophage_M0 \n",
+ "149 Estimate_ESTIMATEScore \n",
+ "150 CNV_ratio \n",
+ "151 Estimate_ESTIMATEScore \n",
+ "152 Genomic_subtype \n",
+ "153 CNV_ratio \n",
+ "154 Cibersort_Macrophage_M2 \n",
+ "155 xCell_Cancer_associated_fibroblast \n",
+ "156 Cibersort_Macrophage_M2 \n",
+ "157 Progeny_Hypoxia \n",
+ "158 Progeny_TGFb \n",
+ "159 Batch \n",
+ "160 Progeny_WNT \n",
+ "161 Case_id \n",
+ "162 Tumor_site \n",
+ "163 Progeny_TGFb \n",
+ "164 Progeny_TGFb \n",
+ "165 Mutation_signature_SBS7a \n",
+ "166 Case_id \n",
+ "167 Tumor_site \n",
+ "168 Progeny_TGFb \n",
+ "169 ARID1A \n",
+ "170 Aliquot_ID \n",
+ "171 Case_id \n",
+ "172 ARID1A \n",
+ "173 Aliquot_ID \n",
+ "174 Case_id \n",
+ "175 Mutation_signature_SBS7a \n",
+ "176 Case_id \n",
+ "177 Mutation_signature_SBS42 \n",
+ "178 MSI_status "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.set_option('display.max_rows', None)\n",
+ "schema_mapping = bdi.match_schema(df_source, df_target, method=\"ct_learning\")\n",
+ "schema_mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 1 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6ba498c573914394bc4c41cabef033d6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 213 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6b6337e22fd94ddc8fc2c24a775531c1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/213 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Tumor_purity | \n",
+ " Tumor_size_cm | \n",
+ " 0.108482 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Tumor_purity | \n",
+ " ABSOLUTE_tumor_purity | \n",
+ " 0.099089 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS10b | \n",
+ " 0.094293 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_T_cell_regulatory_(Tregs) | \n",
+ " 0.092900 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_Monocyte | \n",
+ " 0.092766 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_B_cell_naive | \n",
+ " 0.092648 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS1 | \n",
+ " 0.092625 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_T_cell_gamma_delta | \n",
+ " 0.091828 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Tumor_purity | \n",
+ " Progeny_TGFb | \n",
+ " 0.090625 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS10a | \n",
+ " 0.090585 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Tumor_purity | \n",
+ " Pathologic_staging_primary_tumor_pt | \n",
+ " 0.089843 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS7a | \n",
+ " 0.089825 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS6 | \n",
+ " 0.089275 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS42 | \n",
+ " 0.089130 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS5 | \n",
+ " 0.088617 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Tumor_purity | \n",
+ " Progeny_TNFa | \n",
+ " 0.088416 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS15 | \n",
+ " 0.088232 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Tumor_purity | \n",
+ " Plex | \n",
+ " 0.087975 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Tumor_purity | \n",
+ " BMI | \n",
+ " 0.087312 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Tumor_purity | \n",
+ " Follow-up_tumor_status_at_date_of_last_contact... | \n",
+ " 0.087292 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_T_cell_CD8+ | \n",
+ " 0.087164 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Tumor_purity | \n",
+ " xCell_Monocyte | \n",
+ " 0.087080 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " Tumor_purity | \n",
+ " xCell_T_cell_gamma_delta | \n",
+ " 0.086970 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Tumor_purity | \n",
+ " xCell_T_cell_CD8+_naive | \n",
+ " 0.086835 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Tumor_purity | \n",
+ " Tumor_site | \n",
+ " 0.086777 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS20 | \n",
+ " 0.086773 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_Mast_cell_resting | \n",
+ " 0.086565 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_B_cell_memory | \n",
+ " 0.086439 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Tumor_purity | \n",
+ " Tumor_necrosis | \n",
+ " 0.086347 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Tumor_purity | \n",
+ " Was_the_participant_exposed_to_secondhand_smoke | \n",
+ " 0.085964 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Tumor_purity | \n",
+ " xCell_T_cell_regulatory_(Tregs) | \n",
+ " 0.085852 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Tumor_purity | \n",
+ " Mutation_signature_SBS21 | \n",
+ " 0.085632 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Tumor_purity | \n",
+ " Cibersort_T_cell_follicular_helper | \n",
+ " 0.085575 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Tumor_purity | \n",
+ " Current_medications_medication_name_vitamins_s... | \n",
+ " 0.085556 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Tumor_purity | \n",
+ " xCell_T_cell_CD4+_naive | \n",
+ " 0.085543 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target \\\n",
+ "0 Tumor_purity Tumor_size_cm \n",
+ "1 Tumor_purity ABSOLUTE_tumor_purity \n",
+ "2 Tumor_purity Mutation_signature_SBS10b \n",
+ "3 Tumor_purity Cibersort_T_cell_regulatory_(Tregs) \n",
+ "4 Tumor_purity Cibersort_Monocyte \n",
+ "5 Tumor_purity Cibersort_B_cell_naive \n",
+ "6 Tumor_purity Mutation_signature_SBS1 \n",
+ "7 Tumor_purity Cibersort_T_cell_gamma_delta \n",
+ "8 Tumor_purity Progeny_TGFb \n",
+ "9 Tumor_purity Mutation_signature_SBS10a \n",
+ "10 Tumor_purity Pathologic_staging_primary_tumor_pt \n",
+ "11 Tumor_purity Mutation_signature_SBS7a \n",
+ "12 Tumor_purity Mutation_signature_SBS6 \n",
+ "13 Tumor_purity Mutation_signature_SBS42 \n",
+ "14 Tumor_purity Mutation_signature_SBS5 \n",
+ "15 Tumor_purity Progeny_TNFa \n",
+ "16 Tumor_purity Mutation_signature_SBS15 \n",
+ "17 Tumor_purity Plex \n",
+ "18 Tumor_purity BMI \n",
+ "19 Tumor_purity Follow-up_tumor_status_at_date_of_last_contact... \n",
+ "20 Tumor_purity Cibersort_T_cell_CD8+ \n",
+ "21 Tumor_purity xCell_Monocyte \n",
+ "22 Tumor_purity xCell_T_cell_gamma_delta \n",
+ "23 Tumor_purity xCell_T_cell_CD8+_naive \n",
+ "24 Tumor_purity Tumor_site \n",
+ "25 Tumor_purity Mutation_signature_SBS20 \n",
+ "26 Tumor_purity Cibersort_Mast_cell_resting \n",
+ "27 Tumor_purity Cibersort_B_cell_memory \n",
+ "28 Tumor_purity Tumor_necrosis \n",
+ "29 Tumor_purity Was_the_participant_exposed_to_secondhand_smoke \n",
+ "30 Tumor_purity xCell_T_cell_regulatory_(Tregs) \n",
+ "31 Tumor_purity Mutation_signature_SBS21 \n",
+ "32 Tumor_purity Cibersort_T_cell_follicular_helper \n",
+ "33 Tumor_purity Current_medications_medication_name_vitamins_s... \n",
+ "34 Tumor_purity xCell_T_cell_CD4+_naive \n",
+ "\n",
+ " similarity \n",
+ "0 0.108482 \n",
+ "1 0.099089 \n",
+ "2 0.094293 \n",
+ "3 0.092900 \n",
+ "4 0.092766 \n",
+ "5 0.092648 \n",
+ "6 0.092625 \n",
+ "7 0.091828 \n",
+ "8 0.090625 \n",
+ "9 0.090585 \n",
+ "10 0.089843 \n",
+ "11 0.089825 \n",
+ "12 0.089275 \n",
+ "13 0.089130 \n",
+ "14 0.088617 \n",
+ "15 0.088416 \n",
+ "16 0.088232 \n",
+ "17 0.087975 \n",
+ "18 0.087312 \n",
+ "19 0.087292 \n",
+ "20 0.087164 \n",
+ "21 0.087080 \n",
+ "22 0.086970 \n",
+ "23 0.086835 \n",
+ "24 0.086777 \n",
+ "25 0.086773 \n",
+ "26 0.086565 \n",
+ "27 0.086439 \n",
+ "28 0.086347 \n",
+ "29 0.085964 \n",
+ "30 0.085852 \n",
+ "31 0.085632 \n",
+ "32 0.085575 \n",
+ "33 0.085556 \n",
+ "34 0.085543 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.top_matches(df_source, columns=['Tumor_purity'], target=df_target, top_k=35)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Surgery|Surgery | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Surgery | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Other(Mohs treatment) | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Radiation,Surgery | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Unknown | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name\n",
+ "0 NaN\n",
+ "1 Surgery|Surgery\n",
+ "2 Surgery\n",
+ "3 Other(Mohs treatment)\n",
+ "4 Radiation,Surgery\n",
+ "5 Unknown"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(df_target, column='Treatment_naive')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "KeyError",
+ "evalue": "'Ancillary_studies_mlh2'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[26], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Stemness_score\tProgeny_Androgen\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mbdi\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmatch_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_source\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf_target\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMLH2\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mAncillary_studies_mlh2\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtfidf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# bdi.match_values(df_source, df_target, ('Tumor_purity', 'ABSOLUTE_tumor_purity'), method='tfidf')\u001b[39;00m\n",
+ "File \u001b[0;32m~/workspace/askem-arpa-h/bdi-kit/bdikit/api.py:317\u001b[0m, in \u001b[0;36mmatch_values\u001b[0;34m(source, target, column_mapping, method, method_args)\u001b[0m\n\u001b[1;32m 315\u001b[0m method_args \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 316\u001b[0m value_matcher \u001b[38;5;241m=\u001b[39m ValueMatchers\u001b[38;5;241m.\u001b[39mget_instance(method, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmethod_args)\n\u001b[0;32m--> 317\u001b[0m matches \u001b[38;5;241m=\u001b[39m \u001b[43m_match_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_domain\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumn_mapping_list\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue_matcher\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 319\u001b[0m result \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 320\u001b[0m _value_matching_result_to_df(matching_result) \u001b[38;5;28;01mfor\u001b[39;00m matching_result \u001b[38;5;129;01min\u001b[39;00m matches\n\u001b[1;32m 321\u001b[0m ]\n\u001b[1;32m 323\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(column_mapping, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# If only a single mapping is provided (as a tuple), we return the result\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# directly as a DataFrame to make it easier to display it in notebooks.\u001b[39;00m\n",
+ "File \u001b[0;32m~/workspace/askem-arpa-h/bdi-kit/bdikit/api.py:373\u001b[0m, in \u001b[0;36m_match_values\u001b[0;34m(dataset, target_domain, column_mapping, value_matcher)\u001b[0m\n\u001b[1;32m 370\u001b[0m source_column, target_column \u001b[38;5;241m=\u001b[39m mapping[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msource\u001b[39m\u001b[38;5;124m\"\u001b[39m], mapping[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtarget\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 372\u001b[0m \u001b[38;5;66;03m# 1. Select candidate columns for value mapping\u001b[39;00m\n\u001b[0;32m--> 373\u001b[0m target_domain_list \u001b[38;5;241m=\u001b[39m \u001b[43mtarget_domain\u001b[49m\u001b[43m[\u001b[49m\u001b[43mtarget_column\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m target_domain_list \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(target_domain_list) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 375\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'Ancillary_studies_mlh2'"
+ ]
+ }
+ ],
+ "source": [
+ "# Stemness_score\tProgeny_Androgen\n",
+ "bdi.match_values(df_source, df_target, ('MLH2', 'Ancillary_studies_mlh2'), method='tfidf') \n",
+ "# bdi.match_values(df_source, df_target, ('Tumor_purity', 'ABSOLUTE_tumor_purity'), method='tfidf')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2 | \n",
+ " b2 | \n",
+ " 0.578 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4 | \n",
+ " b4 | \n",
+ " 0.578 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " b1 | \n",
+ " 0.578 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " b3 | \n",
+ " 0.578 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 2 b2 0.578\n",
+ "1 4 b4 0.578\n",
+ "2 1 b1 0.578\n",
+ "3 3 b3 0.578\n",
+ "4 5 None NaN"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(df_source, df_target, ('Proteomics_TMT_batch', 'Batch'), method='tfidf')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 1 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8f48aa90b74c4f73856daa6d02be8f16",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 213 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f8c28777ab8e4e48a184e458811702eb",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/213 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " MLH2 | \n",
+ " Ancillary_studies_mlh1 | \n",
+ " 0.103021 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " MLH2 | \n",
+ " BMI | \n",
+ " 0.101541 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " MLH2 | \n",
+ " CNV_status | \n",
+ " 0.100899 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MLH2 | \n",
+ " Mutation_signature_SBS21 | \n",
+ " 0.097867 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MLH2 | \n",
+ " xCell_B_cell | \n",
+ " 0.097845 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " MLH2 | \n",
+ " Mutation_load | \n",
+ " 0.096779 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " MLH2 | \n",
+ " Mutation_signature_SBS20 | \n",
+ " 0.096626 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " MLH2 | \n",
+ " xCell_Macrophage | \n",
+ " 0.096572 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " MLH2 | \n",
+ " xCell_Myeloid_dendritic_cell | \n",
+ " 0.095951 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " MLH2 | \n",
+ " Clinical_staging_distant_metastasis_cm | \n",
+ " 0.095322 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " MLH2 | \n",
+ " Mutation_signature_SBS54 | \n",
+ " 0.094960 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " MLH2 | \n",
+ " xCell_Monocyte | \n",
+ " 0.094659 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " MLH2 | \n",
+ " Mutation_signature_SBS42 | \n",
+ " 0.094600 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " MLH2 | \n",
+ " xCell_Endothelial_cell | \n",
+ " 0.094528 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " MLH2 | \n",
+ " xCell_Class-switched_memory_B_cell | \n",
+ " 0.094443 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " MLH2 | \n",
+ " Cibersort_Macrophage_M2 | \n",
+ " 0.094272 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " MLH2 | \n",
+ " xCell_Mast_cell | \n",
+ " 0.094229 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " MLH2 | \n",
+ " xCell_Macrophage_M1 | \n",
+ " 0.093890 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " MLH2 | \n",
+ " Cibersort_Macrophage_M0 | \n",
+ " 0.093889 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " MLH2 | \n",
+ " Tumor_size_cm | \n",
+ " 0.093696 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " MLH2 | \n",
+ " xCell_Macrophage_M2 | \n",
+ " 0.093364 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " MLH2 | \n",
+ " Case_id | \n",
+ " 0.093306 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " MLH2 | \n",
+ " xCell_NK_cell | \n",
+ " 0.093204 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " MLH2 | \n",
+ " xCell_B_cell_memory | \n",
+ " 0.092884 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " MLH2 | \n",
+ " Mutation_signature_SBS7a | \n",
+ " 0.092399 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " MLH2 | \n",
+ " xCell_Plasmacytoid_dendritic_cell | \n",
+ " 0.091764 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " MLH2 | \n",
+ " xCell_T_cell_CD4+_memory | \n",
+ " 0.091703 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " MLH2 | \n",
+ " xCell_B_cell_naive | \n",
+ " 0.091680 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " MLH2 | \n",
+ " xCell_B_cell_plasma | \n",
+ " 0.091495 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " MLH2 | \n",
+ " MSI_status | \n",
+ " 0.091326 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " MLH2 | \n",
+ " Cibersort_Monocyte | \n",
+ " 0.090943 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " MLH2 | \n",
+ " Idx | \n",
+ " 0.090744 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " MLH2 | \n",
+ " xCell_T_cell_CD4+_naive | \n",
+ " 0.090707 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " MLH2 | \n",
+ " Mutation_signature_SBS10a | \n",
+ " 0.090691 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " MLH2 | \n",
+ " Cibersort_Mast_cell_resting | \n",
+ " 0.090560 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 MLH2 Ancillary_studies_mlh1 0.103021\n",
+ "1 MLH2 BMI 0.101541\n",
+ "2 MLH2 CNV_status 0.100899\n",
+ "3 MLH2 Mutation_signature_SBS21 0.097867\n",
+ "4 MLH2 xCell_B_cell 0.097845\n",
+ "5 MLH2 Mutation_load 0.096779\n",
+ "6 MLH2 Mutation_signature_SBS20 0.096626\n",
+ "7 MLH2 xCell_Macrophage 0.096572\n",
+ "8 MLH2 xCell_Myeloid_dendritic_cell 0.095951\n",
+ "9 MLH2 Clinical_staging_distant_metastasis_cm 0.095322\n",
+ "10 MLH2 Mutation_signature_SBS54 0.094960\n",
+ "11 MLH2 xCell_Monocyte 0.094659\n",
+ "12 MLH2 Mutation_signature_SBS42 0.094600\n",
+ "13 MLH2 xCell_Endothelial_cell 0.094528\n",
+ "14 MLH2 xCell_Class-switched_memory_B_cell 0.094443\n",
+ "15 MLH2 Cibersort_Macrophage_M2 0.094272\n",
+ "16 MLH2 xCell_Mast_cell 0.094229\n",
+ "17 MLH2 xCell_Macrophage_M1 0.093890\n",
+ "18 MLH2 Cibersort_Macrophage_M0 0.093889\n",
+ "19 MLH2 Tumor_size_cm 0.093696\n",
+ "20 MLH2 xCell_Macrophage_M2 0.093364\n",
+ "21 MLH2 Case_id 0.093306\n",
+ "22 MLH2 xCell_NK_cell 0.093204\n",
+ "23 MLH2 xCell_B_cell_memory 0.092884\n",
+ "24 MLH2 Mutation_signature_SBS7a 0.092399\n",
+ "25 MLH2 xCell_Plasmacytoid_dendritic_cell 0.091764\n",
+ "26 MLH2 xCell_T_cell_CD4+_memory 0.091703\n",
+ "27 MLH2 xCell_B_cell_naive 0.091680\n",
+ "28 MLH2 xCell_B_cell_plasma 0.091495\n",
+ "29 MLH2 MSI_status 0.091326\n",
+ "30 MLH2 Cibersort_Monocyte 0.090943\n",
+ "31 MLH2 Idx 0.090744\n",
+ "32 MLH2 xCell_T_cell_CD4+_naive 0.090707\n",
+ "33 MLH2 Mutation_signature_SBS10a 0.090691\n",
+ "34 MLH2 Cibersort_Mast_cell_resting 0.090560"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.top_matches(df_source, columns=['MLH2'], target=df_target, top_k=35)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Finding value matches between two columns\n",
+ "\n",
+ "Once the matching columns are identified, we can standardize data to ensure that no duplicate values represent the same entity/meaning.\n",
+ "\n",
+ "To do that, `bdikit` provides the function `match_values()` to find values that should potentially be merged. The library supports multiple methods to perform this task, including syntactic and semantic matching algorithms. In this example, we use the `tfidf` method, which finds values based on the similarity of character n-grams. Please, refer to the [bdikit documentation](https://bdi-kit.readthedocs.io/) to learn more about the methods available."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Case_excluded
**Target column:** Case_excluded
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " No | \n",
+ " No | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 No No 1.0\n",
+ "1 Yes Yes 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Proteomics_TMT_batch
**Target column:** ABSOLUTE_tumor_purity
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 0.33 | \n",
+ " 0.622 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 0.40 | \n",
+ " 0.608 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 0.50 | \n",
+ " 0.606 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 0.20 | \n",
+ " 0.601 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1.00 | \n",
+ " 0.506 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 3 0.33 0.622\n",
+ "1 4 0.40 0.608\n",
+ "2 5 0.50 0.606\n",
+ "3 2 0.20 0.601\n",
+ "4 1 1.00 0.506"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Proteomics_TMT_plex
**Target column:** Number_of_para-aortic_lymph_nodes_examined
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 8 | \n",
+ " 8 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 0.626 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 15 | \n",
+ " 5 | \n",
+ " 0.592 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 3 | \n",
+ " 0.592 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 17 | \n",
+ " 7 | \n",
+ " 0.592 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 14 | \n",
+ " 4 | \n",
+ " 0.563 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 9 | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 5 5 1.000\n",
+ "1 16 16 1.000\n",
+ "2 2 2 1.000\n",
+ "3 11 11 1.000\n",
+ "4 12 12 1.000\n",
+ "5 8 8 1.000\n",
+ "6 7 7 1.000\n",
+ "7 6 6 1.000\n",
+ "8 3 3 1.000\n",
+ "9 1 1 1.000\n",
+ "10 4 4 1.000\n",
+ "11 10 0 0.626\n",
+ "12 15 5 0.592\n",
+ "13 13 3 0.592\n",
+ "14 17 7 0.592\n",
+ "15 14 4 0.563\n",
+ "16 9 None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Proteomics_TMT_channel
**Target column:** ReporterName
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 128N | \n",
+ " 128N | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 130N | \n",
+ " 130N | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 129N | \n",
+ " 129N | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 129C | \n",
+ " 129C | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 130C | \n",
+ " 130C | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 127N | \n",
+ " 127N | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 127C | \n",
+ " 127C | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 128C | \n",
+ " 128C | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 131 | \n",
+ " 131N | \n",
+ " 0.714 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 128N 128N 1.000\n",
+ "1 130N 130N 1.000\n",
+ "2 129N 129N 1.000\n",
+ "3 129C 129C 1.000\n",
+ "4 130C 130C 1.000\n",
+ "5 127N 127N 1.000\n",
+ "6 127C 127C 1.000\n",
+ "7 128C 128C 1.000\n",
+ "8 131 131N 0.714"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Proteomics_Tumor_Normal
**Target column:** Group
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Tumor | \n",
+ " Tumor | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Adjacent_normal | \n",
+ " Adjacent_normal | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Enriched_normal | \n",
+ " Enriched_Normal | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Myometrium_normal | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Tumor Tumor 1.0\n",
+ "1 Adjacent_normal Adjacent_normal 1.0\n",
+ "2 Enriched_normal Enriched_Normal 1.0\n",
+ "3 Myometrium_normal None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Proteomics_OCT
**Target column:** POLE
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " No | \n",
+ " No | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 No No 1.0\n",
+ "1 Yes Yes 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Country
**Target column:** Participant_country
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " United States | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " Ukraine | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " Poland | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States United States 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Ukraine Ukraine 1.0\n",
+ "3 Poland Poland 1.0\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Histologic_Grade_FIGO
**Target column:** Histologic_grade
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " FIGO grade 1 | \n",
+ " Other: High grade | \n",
+ " 0.426 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " FIGO grade 2 | \n",
+ " Other: High grade | \n",
+ " 0.426 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " FIGO grade 3 | \n",
+ " Other: High grade | \n",
+ " 0.426 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.000\n",
+ "1 FIGO grade 1 Other: High grade 0.426\n",
+ "2 FIGO grade 2 Other: High grade 0.426\n",
+ "3 FIGO grade 3 Other: High grade 0.426"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Myometrial_invasion_Specify
**Target column:** Myometrial_invasion_present_specify
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 50 % or more | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not identified | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " under 50 % | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 50 % or more NaN NaN\n",
+ "2 Not identified NaN NaN\n",
+ "3 under 50 % NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Histologic_type
**Target column:** Histologic_Type
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Endometrioid carcinoma | \n",
+ " 0.855 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Clear cell carcinoma | \n",
+ " 0.835 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous carcinoma | \n",
+ " 0.717 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Carcinosarcoma | \n",
+ " Serous carcinoma | \n",
+ " 0.618 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.000\n",
+ "1 Endometrioid Endometrioid carcinoma 0.855\n",
+ "2 Clear cell Clear cell carcinoma 0.835\n",
+ "3 Serous Serous carcinoma 0.717\n",
+ "4 Carcinosarcoma Serous carcinoma 0.618"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Treatment_naive
**Target column:** Follow-up_additional_treatment_radiation_therapy_for_new_tumor
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " NO | \n",
+ " No|No | \n",
+ " 0.806 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " YES | \n",
+ " Yes|Yes|Yes|Yes | \n",
+ " 0.787 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.000\n",
+ "1 NO No|No 0.806\n",
+ "2 YES Yes|Yes|Yes|Yes 0.787"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Tumor_purity
**Target column:** Tumor_necrosis
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Normal | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Low | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Normal NaN NaN\n",
+ "2 Low NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Path_Stage_Primary_Tumor-pT
**Target column:** Pathologic_staging_primary_tumor_pt
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT1a (FIGO IA) | \n",
+ " pT1a (FIGO IA) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT1 (FIGO I) | \n",
+ " pT1 (FIGO I) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pT1b (FIGO IB) | \n",
+ " pT1b (FIGO IB) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT2 (FIGO II) | \n",
+ " pT2 (FIGO II) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT1a (FIGO IA) pT1a (FIGO IA) 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 pT3a (FIGO IIIA) pT3a (FIGO IIIA) 1.0\n",
+ "3 pT1 (FIGO I) pT1 (FIGO I) 1.0\n",
+ "4 pT1b (FIGO IB) pT1b (FIGO IB) 1.0\n",
+ "5 pT2 (FIGO II) pT2 (FIGO II) 1.0\n",
+ "6 pT3b (FIGO IIIB) pT3b (FIGO IIIB) 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Path_Stage_Reg_Lymph_Nodes-pN
**Target column:** Pathologic_staging_regional_lymph_nodes_pn
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pN0 | \n",
+ " pN0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pNX | \n",
+ " pNX | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pN2 (FIGO IIIC2) | \n",
+ " pN2 (FIGO IIIC2) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pN1 (FIGO IIIC1) | \n",
+ " pN1 (FIGO IIIC1) | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pN0 pN0 1.0\n",
+ "1 pNX pNX 1.0\n",
+ "2 nan NaN 1.0\n",
+ "3 pN2 (FIGO IIIC2) pN2 (FIGO IIIC2) 1.0\n",
+ "4 pN1 (FIGO IIIC1) pN1 (FIGO IIIC1) 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Clin_Stage_Dist_Mets-cM
**Target column:** Clinical_staging_distant_metastasis_cm
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " cM0 | \n",
+ " cM0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Staging Incomplete | \n",
+ " Staging Incomplete | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " cM1 | \n",
+ " cM1 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 cM0 cM0 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Staging Incomplete Staging Incomplete 1.0\n",
+ "3 cM1 cM1 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Path_Stage_Dist_Mets-pM
**Target column:** Clinical_staging_distant_metastasis_cm
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Staging Incomplete | \n",
+ " Staging Incomplete | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pM1 | \n",
+ " cM1 | \n",
+ " 0.445 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " No pathologic evidence of distant metastasis | \n",
+ " Staging Incomplete | \n",
+ " 0.423 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target \\\n",
+ "0 Staging Incomplete Staging Incomplete \n",
+ "1 nan NaN \n",
+ "2 pM1 cM1 \n",
+ "3 No pathologic evidence of distant metastasis Staging Incomplete \n",
+ "\n",
+ " similarity \n",
+ "0 1.000 \n",
+ "1 1.000 \n",
+ "2 0.445 \n",
+ "3 0.423 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** tumor_Stage-Pathological
**Target column:** Tumor_stage_pathological
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Stage I | \n",
+ " Stage I | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Stage IV | \n",
+ " Stage IV | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Stage III | \n",
+ " Stage III | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Stage II | \n",
+ " Stage II | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Stage I Stage I 1.0\n",
+ "1 Stage IV Stage IV 1.0\n",
+ "2 nan NaN 1.0\n",
+ "3 Stage III Stage III 1.0\n",
+ "4 Stage II Stage II 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** FIGO_stage
**Target column:** Pathologic_staging_primary_tumor_pt
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " IIIB | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " 0.689 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " IIIA | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " 0.682 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " IA | \n",
+ " pT1a [IA] | \n",
+ " 0.567 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " II | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " 0.526 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " IB | \n",
+ " pT1b (FIGO IB) | \n",
+ " 0.498 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " IIIC2 | \n",
+ " pT2 [II] | \n",
+ " 0.403 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " IIIC1 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " 0.344 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " IVB | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.000\n",
+ "1 IIIB pT3b (FIGO IIIB) 0.689\n",
+ "2 IIIA pT3a (FIGO IIIA) 0.682\n",
+ "3 IA pT1a [IA] 0.567\n",
+ "4 II pT3a (FIGO IIIA) 0.526\n",
+ "5 IB pT1b (FIGO IB) 0.498\n",
+ "6 IIIC2 pT2 [II] 0.403\n",
+ "7 IIIC1 pT3a (FIGO IIIA) 0.344\n",
+ "8 IVB None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Diabetes
**Target column:** Diabetes
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " No | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Yes Yes 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Unknown None NaN\n",
+ "3 No None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Race
**Target column:** Race
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " White | \n",
+ " White | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Asian | \n",
+ " Asian | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Not Reported | \n",
+ " Not Reported | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Black or African American | \n",
+ " Black or African American | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 White White 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Asian Asian 1.0\n",
+ "3 Not Reported Not Reported 1.0\n",
+ "4 Black or African American Black or African American 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Ethnicity
**Target column:** Ethnicity
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Not-Hispanic or Latino | \n",
+ " Not-Hispanic or Latino | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Hispanic or Latino | \n",
+ " Hispanic or Latino | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Not reported | \n",
+ " Not reported | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Not-Hispanic or Latino Not-Hispanic or Latino 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Hispanic or Latino Hispanic or Latino 1.0\n",
+ "3 Not reported Not reported 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Gender
**Target column:** Sex
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Female | \n",
+ " Female | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Female Female 1.0\n",
+ "1 nan NaN 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Tumor_Site
**Target column:** Tumor_site
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Anterior endometrium | \n",
+ " Anterior endometrium | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Posterior endometrium | \n",
+ " Posterior endometrium | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Other, specify | \n",
+ " Other | \n",
+ " 0.558 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Anterior endometrium Anterior endometrium 1.000\n",
+ "1 Posterior endometrium Posterior endometrium 1.000\n",
+ "2 nan NaN 1.000\n",
+ "3 Other, specify Other 0.558"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Tumor_Focality
**Target column:** Tumor_focality
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Unifocal | \n",
+ " Unifocal | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Multifocal | \n",
+ " Multifocal | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Unifocal Unifocal 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Multifocal Multifocal 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Estrogen_Receptor
**Target column:** Ancillary_studies_estrogen_receptor
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Negative | \n",
+ " Negative | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Positive | \n",
+ " Positive : 5 % | \n",
+ " 0.941 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Cannot be determined Cannot be determined 1.000\n",
+ "1 nan NaN 1.000\n",
+ "2 Negative Negative 1.000\n",
+ "3 Positive Positive : 5 % 0.941\n",
+ "4 Unknown None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Progesterone_Receptor
**Target column:** Ancillary_studies_progesterone_receptor
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Negative | \n",
+ " Negative | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Positive | \n",
+ " Positive : 5 % | \n",
+ " 0.941 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Cannot be determined Cannot be determined 1.000\n",
+ "1 nan NaN 1.000\n",
+ "2 Negative Negative 1.000\n",
+ "3 Positive Positive : 5 % 0.941\n",
+ "4 Unknown None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** MLH1
**Target column:** Ancillary_studies_mlh1
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Intact nuclear expression | \n",
+ " Intact nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Loss of nuclear expression | \n",
+ " Loss of nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Intact nuclear expression Intact nuclear expression 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Loss of nuclear expression Loss of nuclear expression 1.0\n",
+ "3 Cannot be determined Cannot be determined 1.0\n",
+ "4 Unknown None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** MLH2
**Target column:** Ancillary_studies_mlh1
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Intact nuclear expression | \n",
+ " Intact nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Loss of nuclear expression | \n",
+ " Loss of nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Intact nuclear expression Intact nuclear expression 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Cannot be determined Cannot be determined 1.0\n",
+ "3 Loss of nuclear expression Loss of nuclear expression 1.0\n",
+ "4 Unknown None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** MSH6
**Target column:** Ancillary_studies_msh2
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Loss of nuclear expression | \n",
+ " Loss of nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Intact nuclear expression | \n",
+ " Intact nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Loss of nuclear expression Loss of nuclear expression 1.0\n",
+ "1 Intact nuclear expression Intact nuclear expression 1.0\n",
+ "2 nan NaN 1.0\n",
+ "3 Cannot be determined Cannot be determined 1.0\n",
+ "4 Unknown None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** PMS2
**Target column:** Ancillary_studies_pms2
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Intact nuclear expression | \n",
+ " Intact nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Loss of nuclear expression | \n",
+ " Loss of nuclear expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Intact nuclear expression Intact nuclear expression 1.0\n",
+ "1 Loss of nuclear expression Loss of nuclear expression 1.0\n",
+ "2 nan NaN 1.0\n",
+ "3 Cannot be determined Cannot be determined 1.0\n",
+ "4 Unknown None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** p53
**Target column:** Ancillary_studies_p53
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Normal | \n",
+ " Normal | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Overexpression | \n",
+ " Overexpression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Loss of expression | \n",
+ " Loss of expression | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Cannot be determined Cannot be determined 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Normal Normal 1.0\n",
+ "3 Overexpression Overexpression 1.0\n",
+ "4 Loss of expression Loss of expression 1.0\n",
+ "5 Unknown None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** MLH1_Promoter_Hypermethylation
**Target column:** Ancillary_studies_mlh1_promoter_hypermethylation
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Cannot be determined | \n",
+ " Cannot be determined | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Absent | \n",
+ " Absent | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Present | \n",
+ " Present | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Cannot be determined Cannot be determined 1.0\n",
+ "1 nan NaN 1.0\n",
+ "2 Absent Absent 1.0\n",
+ "3 Present Present 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Num_full_term_pregnancies
**Target column:** Donor_information_number_of_full_term_pregnancies
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4 or more | \n",
+ " 4 or more | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Unknown | \n",
+ " Unknown | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 1 1 1.0\n",
+ "1 4 or more 4 or more 1.0\n",
+ "2 nan NaN 1.0\n",
+ "3 2 2 1.0\n",
+ "4 3 3 1.0\n",
+ "5 Unknown Unknown 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** CNV_class
**Target column:** CNV_status
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CNV_LOW | \n",
+ " CNV_L | \n",
+ " 0.631 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " CNV_HIGH | \n",
+ " CNV_H | \n",
+ " 0.623 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.000\n",
+ "1 CNV_LOW CNV_L 0.631\n",
+ "2 CNV_HIGH CNV_H 0.623"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** MSI_status
**Target column:** MSI_status
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " MSI-H | \n",
+ " MSI-H | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " MSS | \n",
+ " MSS | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 MSI-H MSI-H 1.0\n",
+ "1 MSS MSS 1.0\n",
+ "2 nan NaN 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** POLE_subtype
**Target column:** POLE
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " No | \n",
+ " No | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 No No 1.0\n",
+ "1 Yes Yes 1.0\n",
+ "2 nan NaN 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** JAK1_MS_INDEL
**Target column:** MSI_status
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " MS_indel | \n",
+ " MSI-H | \n",
+ " 0.352 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " WT | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.000\n",
+ "1 MS_indel MSI-H 0.352\n",
+ "2 WT None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** JAK1_Mutation
**Target column:** BMI
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Nonsense_Mutation | \n",
+ " NaN | \n",
+ " 0.31 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " WT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Missense_Mutation | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Frame_Shift_Ins_Nonsense_Mutation | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Frame_Shift_Del_Nonsense_Mutation | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Frame_Shift_Del | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Frame_Shift_Del_Frame_Shift_Ins | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.00\n",
+ "1 Nonsense_Mutation NaN 0.31\n",
+ "2 WT NaN NaN\n",
+ "3 Missense_Mutation NaN NaN\n",
+ "4 Frame_Shift_Ins_Nonsense_Mutation NaN NaN\n",
+ "5 Frame_Shift_Del_Nonsense_Mutation NaN NaN\n",
+ "6 Frame_Shift_Del NaN NaN\n",
+ "7 Frame_Shift_Del_Frame_Shift_Ins NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Genomics_subtype
**Target column:** Genomic_subtype
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " MSI-H | \n",
+ " MSI-H | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " POLE | \n",
+ " POLE | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " CNV_low | \n",
+ " CNV_L | \n",
+ " 0.663 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " CNV_high | \n",
+ " CNV_H | \n",
+ " 0.632 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 MSI-H MSI-H 1.000\n",
+ "1 nan NaN 1.000\n",
+ "2 POLE POLE 1.000\n",
+ "3 CNV_low CNV_L 0.663\n",
+ "4 CNV_high CNV_H 0.632"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** WXS_normal_sample_type
**Target column:** Batch
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Blood_normal | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Blood_normal NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** WXS_tumor_sample_type
**Target column:** Tumor_site
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Tumor | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Tumor NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** WGS_normal_sample_type
**Target column:** Mutation_signature_SBS7a
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Blood_normal | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Blood_normal NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** WGS_tumor_sample_type
**Target column:** Tumor_site
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Tumor | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Tumor NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** RNAseq_R1_sample_type
**Target column:** ARID1A
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Adjacent_normal | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Tumor | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Adjacent_normal NaN NaN\n",
+ "2 Tumor NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** RNAseq_R2_sample_type
**Target column:** ARID1A
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Adjacent_normal | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Tumor | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Adjacent_normal NaN NaN\n",
+ "2 Tumor NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** miRNAseq_sample_type
**Target column:** Mutation_signature_SBS7a
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Adjacent_normal | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Tumor | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 Adjacent_normal NaN NaN\n",
+ "2 Tumor NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Methylation_available
**Target column:** Mutation_signature_SBS42
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " YES | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.0\n",
+ "1 YES NaN NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "
**Source column:** Methylation_quality
**Target column:** MSI_status
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " NaN | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " PASS | \n",
+ " MSS | \n",
+ " 0.414 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Failed | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan NaN 1.000\n",
+ "1 PASS MSS 0.414\n",
+ "2 Failed None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Matches values from each pair of source-target columns\n",
+ "value_matches = bdi.match_values(df_source, df_target, schema_mapping.head(250), method=\"tfidf\")\n",
+ "\n",
+ "# Print value matches\n",
+ "for match in value_matches:\n",
+ " display(\n",
+ " Markdown(\n",
+ " f\"
**Source column:** {match.attrs['source']}
\"\n",
+ " f\"**Target column:** {match.attrs['target']}
\"\n",
+ " )\n",
+ " )\n",
+ " display(match)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['b4', 'b3', 'b1', 'b2', nan]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list(df_target[\"Batch\"].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Idx',\n",
+ " 'Case_id',\n",
+ " 'Case_excluded',\n",
+ " 'Batch',\n",
+ " 'Plex',\n",
+ " 'ReporterName',\n",
+ " 'Aliquot_ID',\n",
+ " 'Group',\n",
+ " 'Discovery_study',\n",
+ " 'Age',\n",
+ " 'Sex',\n",
+ " 'Histologic_Type',\n",
+ " 'Histologic_grade',\n",
+ " 'Tumor_size_cm',\n",
+ " 'Height_at_time_of_surgery_cm',\n",
+ " 'Weight_at_time_of_surgery_kg',\n",
+ " 'BMI',\n",
+ " 'Myometrial_invasion',\n",
+ " 'Myometrial_invasion_present_specify',\n",
+ " 'AJCC_tnm_cancer_staging_edition_used',\n",
+ " 'Pathologic_staging_primary_tumor_pt',\n",
+ " 'Pathologic_staging_regional_lymph_nodes_pn',\n",
+ " 'Number_of_pelvic_lymph_nodes_examined',\n",
+ " 'Tumor_stage_pathological',\n",
+ " 'Race',\n",
+ " 'CNV_ratio',\n",
+ " 'CNV_status',\n",
+ " 'POLE',\n",
+ " 'MSIsensor_ratio',\n",
+ " 'MSI_status',\n",
+ " 'Genomic_subtype',\n",
+ " 'Mutation_load',\n",
+ " 'TP53',\n",
+ " 'PTEN',\n",
+ " 'CTNNB1',\n",
+ " 'ARID1A',\n",
+ " 'PIK3CA',\n",
+ " 'xCell_Myeloid_dendritic_cell_activated',\n",
+ " 'xCell_B_cell',\n",
+ " 'xCell_T_cell_CD4+_memory',\n",
+ " 'xCell_T_cell_CD4+_naive',\n",
+ " 'xCell_T_cell_CD4+_(non-regulatory)',\n",
+ " 'xCell_T_cell_CD4+_central_memory',\n",
+ " 'xCell_T_cell_CD4+_effector_memory',\n",
+ " 'xCell_T_cell_CD8+_naive',\n",
+ " 'xCell_T_cell_CD8+',\n",
+ " 'xCell_T_cell_CD8+_central_memory',\n",
+ " 'xCell_T_cell_CD8+_effector_memory',\n",
+ " 'xCell_Class-switched_memory_B_cell',\n",
+ " 'xCell_Common_lymphoid_progenitor',\n",
+ " 'xCell_Common_myeloid_progenitor',\n",
+ " 'xCell_Myeloid_dendritic_cell',\n",
+ " 'xCell_Endothelial_cell',\n",
+ " 'xCell_Eosinophil',\n",
+ " 'xCell_Cancer_associated_fibroblast',\n",
+ " 'xCell_Granulocyte-monocyte_progenitor',\n",
+ " 'xCell_Hematopoietic_stem_cell',\n",
+ " 'xCell_Macrophage',\n",
+ " 'xCell_Macrophage_M1',\n",
+ " 'xCell_Macrophage_M2',\n",
+ " 'xCell_Mast_cell',\n",
+ " 'xCell_B_cell_memory',\n",
+ " 'xCell_Monocyte',\n",
+ " 'xCell_B_cell_naive',\n",
+ " 'xCell_Neutrophil',\n",
+ " 'xCell_NK_cell',\n",
+ " 'xCell_T_cell_NK',\n",
+ " 'xCell_Plasmacytoid_dendritic_cell',\n",
+ " 'xCell_B_cell_plasma',\n",
+ " 'xCell_T_cell_gamma_delta',\n",
+ " 'xCell_T_cell_CD4+_Th1',\n",
+ " 'xCell_T_cell_CD4+_Th2',\n",
+ " 'xCell_T_cell_regulatory_(Tregs)',\n",
+ " 'xCell_immune_score',\n",
+ " 'xCell_stroma_score',\n",
+ " 'xCell_microenvironment_score',\n",
+ " 'Cibersort_B_cell_naive',\n",
+ " 'Cibersort_B_cell_memory',\n",
+ " 'Cibersort_B_cell_plasma',\n",
+ " 'Cibersort_T_cell_CD8+',\n",
+ " 'Cibersort_T_cell_CD4+_naive',\n",
+ " 'Cibersort_T_cell_CD4+_memory_resting',\n",
+ " 'Cibersort_T_cell_CD4+_memory_activated',\n",
+ " 'Cibersort_T_cell_follicular_helper',\n",
+ " 'Cibersort_T_cell_regulatory_(Tregs)',\n",
+ " 'Cibersort_T_cell_gamma_delta',\n",
+ " 'Cibersort_NK_cell_resting',\n",
+ " 'Cibersort_NK_cell_activated',\n",
+ " 'Cibersort_Monocyte',\n",
+ " 'Cibersort_Macrophage_M0',\n",
+ " 'Cibersort_Macrophage_M1',\n",
+ " 'Cibersort_Macrophage_M2',\n",
+ " 'Cibersort_Myeloid_dendritic_cell_resting',\n",
+ " 'Cibersort_Myeloid_dendritic_cell_activated',\n",
+ " 'Cibersort_Mast_cell_activated',\n",
+ " 'Cibersort_Mast_cell_resting',\n",
+ " 'Cibersort_Eosinophil',\n",
+ " 'Cibersort_Neutrophil',\n",
+ " 'Mutation_signature_SBS1',\n",
+ " 'Mutation_signature_SBS5',\n",
+ " 'Mutation_signature_SBS6',\n",
+ " 'Mutation_signature_SBS7a',\n",
+ " 'Mutation_signature_SBS10a',\n",
+ " 'Mutation_signature_SBS10b',\n",
+ " 'Mutation_signature_SBS15',\n",
+ " 'Mutation_signature_SBS20',\n",
+ " 'Mutation_signature_SBS21',\n",
+ " 'Mutation_signature_SBS42',\n",
+ " 'Mutation_signature_SBS54',\n",
+ " 'ABSOLUTE_tumor_purity',\n",
+ " 'Diabetes',\n",
+ " 'Metformin_treatment',\n",
+ " 'Progeny_Androgen',\n",
+ " 'Progeny_EGFR',\n",
+ " 'Progeny_Estrogen',\n",
+ " 'Progeny_Hypoxia',\n",
+ " 'Progeny_JAK.STAT',\n",
+ " 'Progeny_MAPK',\n",
+ " 'Progeny_NFkB',\n",
+ " 'Progeny_p53',\n",
+ " 'Progeny_PI3K',\n",
+ " 'Progeny_TGFb',\n",
+ " 'Progeny_TNFa',\n",
+ " 'Progeny_Trail',\n",
+ " 'Progeny_VEGF',\n",
+ " 'Progeny_WNT',\n",
+ " 'Estimate_StromalScore',\n",
+ " 'Estimate_ImmuneScore',\n",
+ " 'Estimate_ESTIMATEScore',\n",
+ " 'Peritoneal_ascitic_fluid',\n",
+ " 'Tumor_necrosis',\n",
+ " 'Margin_status',\n",
+ " 'Ethnicity',\n",
+ " 'Ethnicity_race_ancestry_identified',\n",
+ " 'Participant_country',\n",
+ " 'Tumor_site',\n",
+ " 'Tumor_site_other',\n",
+ " 'Tumor_focality',\n",
+ " 'Number_of_pelvic_lymph_nodes_positive_for_tumor_by_ihc_staining_only',\n",
+ " 'Number_of_pelvic_lymph_nodes_positive_for_tumor_by_he',\n",
+ " 'Number_of_para-aortic_lymph_nodes_examined',\n",
+ " 'Number_of_para-aortic_lymph_nodes_positive_for_tumor_by_ihc_staining_only',\n",
+ " 'Number_of_para-aortic_lymph_nodes_positive_for_tumor_by_he',\n",
+ " 'Number_of_other_lymph_nodes_examined',\n",
+ " 'Number_of_other_lymph_nodes_positive_for_tumor_by_ihc_staining_only',\n",
+ " 'Number_of_other_lymph_nodes_positive_for_tumor_by_he',\n",
+ " 'Clinical_staging_distant_metastasis_cm',\n",
+ " 'Residual_tumor',\n",
+ " 'Ancillary_studies_estrogen_receptor',\n",
+ " 'Ancillary_studies_progesterone_receptor',\n",
+ " 'Ancillary_studies_mlh1',\n",
+ " 'Ancillary_studies_msh2',\n",
+ " 'Ancillary_studies_msh6',\n",
+ " 'Ancillary_studies_pms2',\n",
+ " 'Ancillary_studies_p53',\n",
+ " 'Ancillary_studies_other_immunohistochemistry_performed',\n",
+ " 'Ancillary_studies_mlh1_promoter_hypermethylation',\n",
+ " 'Ancillary_studies_other_testing_performed',\n",
+ " 'Donor_information_menopause_status',\n",
+ " 'Donor_information_has_the_patient_ever_taken_menopausal_hormone_therapy',\n",
+ " 'Donor_information_number_of_full_term_pregnancies',\n",
+ " 'Ancillary_studies_other_immunohistochemistry_type_and_result',\n",
+ " 'baseline_ancillary_studies_other_testing_type_and_result',\n",
+ " 'History_of_cancer',\n",
+ " 'Alcohol_consumption',\n",
+ " 'Tobacco_smoking_history',\n",
+ " 'Age_at_which_the_participant_started_smoking',\n",
+ " 'Age_at_which_the_participant_stopped_smoking',\n",
+ " 'On_the_days_participant_smoked_how_many_cigarettes_did_he_she_usually_smoke',\n",
+ " 'Number_of_pack_years_smoked',\n",
+ " 'Was_the_participant_exposed_to_secondhand_smoke',\n",
+ " 'Personal_medical_history_history_source',\n",
+ " 'medical_history_current_medications',\n",
+ " 'Current_medications_history_source',\n",
+ " 'Current_medications_medication_name_vitamins_supplements',\n",
+ " 'Cancer_history_cancer_type',\n",
+ " 'Cancer_history_history_source',\n",
+ " 'Cancer_history_history_of_any_treatment',\n",
+ " 'Cancer_history_medical_record_documentation_of_this_history_of_cancer_and_treatment',\n",
+ " 'Procurement_blood_collection_minimum_required_blood_collected',\n",
+ " 'Procurement_blood_collection_number_of_blood_tubes_collected',\n",
+ " 'Follow-up_follow_up_period',\n",
+ " 'Follow-up_is_this_patient_lost_to_follow-up',\n",
+ " 'Follow-up_vital_status_at_date_of_last_contact',\n",
+ " 'Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_last_contact',\n",
+ " 'Follow-up_adjuvant_post-operative_radiation_therapy',\n",
+ " 'Follow-up_adjuvant_post-operative_pharmaceutical_therapy',\n",
+ " 'Follow-up_adjuvant_post-operative_immunological_therapy',\n",
+ " 'Follow-up_tumor_status_at_date_of_last_contact_or_death',\n",
+ " 'Follow-up_has_the_patient_ever_taken_menopausal_hormone_therapy',\n",
+ " 'Follow-up_has_the_patient_ever_taken_oral_contraceptives',\n",
+ " 'Follow-up_has_the_patient_ever_taken_tamoxifen',\n",
+ " 'Follow-up_hypertension',\n",
+ " 'Follow-up_has_the_patient_ever_been_diagnosed_with_diabetes_by_a_physician',\n",
+ " 'Follow-up_number_of_full_term_pregnancies',\n",
+ " 'Follow-up_has_the_patient_had_colorectal_cancer',\n",
+ " 'Follow-up_measure_of_success_of_outcome_at_the_completion_of_initial_first_course_treatment',\n",
+ " 'Follow-up_measure_of_success_of_outcome_at_completion_of_this_follow-up_form',\n",
+ " 'Follow-up_measure_of_success_of_outcome_at_date_of_last_contact_or_death',\n",
+ " 'follow-up_new_tumor_after_initial_treatment',\n",
+ " 'Follow-up_type_of_new_tumor',\n",
+ " 'Follow-up_site_of_new_tumor',\n",
+ " 'Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_new_tumor_after_initial_treatment',\n",
+ " 'Follow-up_additional_surgery_for_new_tumor',\n",
+ " 'Follow-up_additional_treatment_radiation_therapy_for_new_tumor',\n",
+ " 'Follow-up_additional_treatment_pharmaceutical_therapy_for_new_tumor',\n",
+ " 'Follow-up_additional_treatment_immuno_for_new_tumor',\n",
+ " 'Follow-up_days_from_date_of_collection_to_date_of_last_contact',\n",
+ " 'Follow-up_cause_of_death',\n",
+ " 'Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_death',\n",
+ " 'Follow-up_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor',\n",
+ " 'Follow-up_procedure_type_of_new_tumor',\n",
+ " 'Follow-up_residual_tumor_after_surgery_for_new_tumor']"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list(df_target.columns)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Generating a harmonized table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " participant_country | \n",
+ " sex | \n",
+ " tumor_stage_pathological | \n",
+ " pathologic_staging_regional_lymph_nodes_pn | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pN0 | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IV | \n",
+ " pNX | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pN0 | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pNX | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " None | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pNX | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " None | \n",
+ " Female | \n",
+ " Stage III | \n",
+ " pN0 | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage III | \n",
+ " pN0 | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " None | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pN0 | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " participant_country sex tumor_stage_pathological \\\n",
+ "0 United States Female Stage IA \n",
+ "1 United States Female Stage IV \n",
+ "2 United States Female Stage IA \n",
+ "3 NaN NaN NaN \n",
+ "4 United States Female Stage IA \n",
+ ".. ... ... ... \n",
+ "99 None Female Stage IA \n",
+ "100 None Female Stage III \n",
+ "101 United States Female Stage III \n",
+ "102 None Female Stage IA \n",
+ "103 None NaN NaN \n",
+ "\n",
+ " pathologic_staging_regional_lymph_nodes_pn tumor_focality \n",
+ "0 pN0 Unifocal \n",
+ "1 pNX Unifocal \n",
+ "2 pN0 Unifocal \n",
+ "3 NaN NaN \n",
+ "4 pNX Unifocal \n",
+ ".. ... ... \n",
+ "99 pNX Unifocal \n",
+ "100 pN0 Unifocal \n",
+ "101 pN0 Unifocal \n",
+ "102 pN0 Unifocal \n",
+ "103 NaN NaN \n",
+ "\n",
+ "[104 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_mapped = bdi.materialize_mapping(df_source, value_matches)\n",
+ "df_mapped"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " Gender | \n",
+ " FIGO_stage | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " tumor_Stage-Pathological | \n",
+ " Tumor_Focality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Female | \n",
+ " IA | \n",
+ " pN0 | \n",
+ " Stage I | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " Female | \n",
+ " IA | \n",
+ " pNX | \n",
+ " Stage IV | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " Female | \n",
+ " IA | \n",
+ " pN0 | \n",
+ " Stage I | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " Female | \n",
+ " IA | \n",
+ " pNX | \n",
+ " Stage I | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Ukraine | \n",
+ " Female | \n",
+ " IA | \n",
+ " pNX | \n",
+ " Stage I | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Ukraine | \n",
+ " Female | \n",
+ " II | \n",
+ " pN0 | \n",
+ " Stage II | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " United States | \n",
+ " Female | \n",
+ " II | \n",
+ " pN0 | \n",
+ " Stage II | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Ukraine | \n",
+ " Female | \n",
+ " IA | \n",
+ " pN0 | \n",
+ " Stage I | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " Ukraine | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Country Gender FIGO_stage Path_Stage_Reg_Lymph_Nodes-pN \\\n",
+ "0 United States Female IA pN0 \n",
+ "1 United States Female IA pNX \n",
+ "2 United States Female IA pN0 \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 United States Female IA pNX \n",
+ ".. ... ... ... ... \n",
+ "99 Ukraine Female IA pNX \n",
+ "100 Ukraine Female II pN0 \n",
+ "101 United States Female II pN0 \n",
+ "102 Ukraine Female IA pN0 \n",
+ "103 Ukraine NaN NaN NaN \n",
+ "\n",
+ " tumor_Stage-Pathological Tumor_Focality \n",
+ "0 Stage I Unifocal \n",
+ "1 Stage IV Unifocal \n",
+ "2 Stage I Unifocal \n",
+ "3 NaN NaN \n",
+ "4 Stage I Unifocal \n",
+ ".. ... ... \n",
+ "99 Stage I Unifocal \n",
+ "100 Stage II Unifocal \n",
+ "101 Stage II Unifocal \n",
+ "102 Stage I Unifocal \n",
+ "103 NaN NaN \n",
+ "\n",
+ "[104 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "source_column_names = list(map(lambda m: m.attrs['source'], value_matches))\n",
+ "target_column_names = list(map(lambda m: m.attrs['target'], value_matches))\n",
+ "df_source[source_column_names]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " participant_country | \n",
+ " sex | \n",
+ " tumor_stage_pathological | \n",
+ " pathologic_staging_regional_lymph_nodes_pn | \n",
+ " tumor_stage_pathological | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pN0 | \n",
+ " Stage IA | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IV | \n",
+ " pNX | \n",
+ " Stage IV | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pN0 | \n",
+ " Stage IA | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " Female | \n",
+ " Stage IA | \n",
+ " pNX | \n",
+ " Stage IA | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 135 | \n",
+ " Poland | \n",
+ " Male | \n",
+ " Stage III | \n",
+ " pN2 | \n",
+ " Stage III | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " China | \n",
+ " Female | \n",
+ " Stage III | \n",
+ " pN2 | \n",
+ " Stage III | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 137 | \n",
+ " China | \n",
+ " Male | \n",
+ " Stage III | \n",
+ " pN2 | \n",
+ " Stage III | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 138 | \n",
+ " Poland | \n",
+ " Female | \n",
+ " Stage III | \n",
+ " pN2 | \n",
+ " Stage III | \n",
+ " Multifocal | \n",
+ "
\n",
+ " \n",
+ " 139 | \n",
+ " Poland | \n",
+ " Female | \n",
+ " Stage III | \n",
+ " pN2 | \n",
+ " Stage III | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
244 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " participant_country sex tumor_stage_pathological \\\n",
+ "0 United States Female Stage IA \n",
+ "1 United States Female Stage IV \n",
+ "2 United States Female Stage IA \n",
+ "3 NaN NaN NaN \n",
+ "4 United States Female Stage IA \n",
+ ".. ... ... ... \n",
+ "135 Poland Male Stage III \n",
+ "136 China Female Stage III \n",
+ "137 China Male Stage III \n",
+ "138 Poland Female Stage III \n",
+ "139 Poland Female Stage III \n",
+ "\n",
+ " pathologic_staging_regional_lymph_nodes_pn tumor_stage_pathological \\\n",
+ "0 pN0 Stage IA \n",
+ "1 pNX Stage IV \n",
+ "2 pN0 Stage IA \n",
+ "3 NaN NaN \n",
+ "4 pNX Stage IA \n",
+ ".. ... ... \n",
+ "135 pN2 Stage III \n",
+ "136 pN2 Stage III \n",
+ "137 pN2 Stage III \n",
+ "138 pN2 Stage III \n",
+ "139 pN2 Stage III \n",
+ "\n",
+ " tumor_focality \n",
+ "0 Unifocal \n",
+ "1 Unifocal \n",
+ "2 Unifocal \n",
+ "3 NaN \n",
+ "4 Unifocal \n",
+ ".. ... \n",
+ "135 Unifocal \n",
+ "136 Unifocal \n",
+ "137 Unifocal \n",
+ "138 Multifocal \n",
+ "139 Unifocal \n",
+ "\n",
+ "[244 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df_mapped[target_column_names], df_target[target_column_names]])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/demo.ipynb b/examples/demo.ipynb
index 3fc6ee7b..7e8c6948 100644
--- a/examples/demo.ipynb
+++ b/examples/demo.ipynb
@@ -70,133 +70,373 @@
" \n",
" \n",
" | \n",
- " Country | \n",
- " Gender | \n",
- " FIGO_stage | \n",
- " Path_Stage_Reg_Lymph_Nodes-pN | \n",
- " tumor_Stage-Pathological | \n",
- " Tumor_Focality | \n",
+ " case_id | \n",
+ " age | \n",
+ " gender | \n",
+ " country | \n",
+ " smoking_history | \n",
+ " smoke_age_start | \n",
+ " smoke_age_stop | \n",
+ " num_smoke_per_day | \n",
+ " num_pack_years_sm | \n",
+ " smoking_second_hand | \n",
+ " ... | \n",
+ " tumor_pathology_review | \n",
+ " ESTIMATE_stromal_score | \n",
+ " ESTIMATE_immune_score | \n",
+ " CD3_IHC_count | \n",
+ " stemness_score | \n",
+ " mutation_count | \n",
+ " neoAntigen_count | \n",
+ " chr_instability_idx | \n",
+ " integrated_subtype | \n",
+ " transcriptomic_subtype | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " United States | \n",
- " Female | \n",
- " IA | \n",
- " pN0 | \n",
- " Stage I | \n",
- " Unifocal | \n",
+ " C3L-00977 | \n",
+ " 56 | \n",
+ " Male | \n",
+ " Russia | \n",
+ " Current reformed smoker, years unknown | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " NaN | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 80% | \n",
+ " 6825.995755 | \n",
+ " 7989.115925 | \n",
+ " 1.0 | \n",
+ " 0.953243 | \n",
+ " 106 | \n",
+ " 0 | \n",
+ " 2.003654 | \n",
+ " Basal | \n",
+ " Mesenchymal | \n",
"
\n",
" \n",
" 1 | \n",
- " United States | \n",
- " Female | \n",
- " IA | \n",
- " pNX | \n",
- " Stage IV | \n",
- " Unifocal | \n",
+ " C3L-00987 | \n",
+ " 61 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 18 | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " 43.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 90%;SCC 90%;SCC 80%;SCC 70% | \n",
+ " 5999.793467 | \n",
+ " 4772.409716 | \n",
+ " 0.0 | \n",
+ " 0.825330 | \n",
+ " 83 | \n",
+ " 0 | \n",
+ " 5.205612 | \n",
+ " CIN | \n",
+ " Classical | \n",
"
\n",
" \n",
" 2 | \n",
- " United States | \n",
- " Female | \n",
- " IA | \n",
- " pN0 | \n",
- " Stage I | \n",
- " Unifocal | \n",
+ " C3L-00994 | \n",
+ " 50 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker within past 15 years | \n",
+ " 16 | \n",
+ " 50 | \n",
+ " 6 | \n",
+ " 10.2 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 40%;SCC 70%;SCC 40%;SCC 75% | \n",
+ " 8924.036564 | \n",
+ " 8176.233903 | \n",
+ " 5.0 | \n",
+ " 0.664581 | \n",
+ " 67 | \n",
+ " 0 | \n",
+ " 1.684475 | \n",
+ " Immune | \n",
+ " Mesenchymal | \n",
"
\n",
" \n",
" 3 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " C3L-00995 | \n",
+ " 56 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker, more than 15 years | \n",
+ " 25 | \n",
+ " 56 | \n",
+ " 20 | \n",
+ " 31.0 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 90%;SCC 80%;SCC 40%;SCC 70% | \n",
+ " 8723.429667 | \n",
+ " 8342.246345 | \n",
+ " 70.0 | \n",
+ " 0.539918 | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " 1.340483 | \n",
+ " Immune | \n",
+ " Mesenchymal | \n",
"
\n",
" \n",
" 4 | \n",
- " United States | \n",
- " Female | \n",
- " IA | \n",
- " pNX | \n",
- " Stage I | \n",
- " Unifocal | \n",
+ " C3L-00997 | \n",
+ " 47 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker, more than 15 years | \n",
+ " 12 | \n",
+ " 27 | \n",
+ " 20 | \n",
+ " 15.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 70%;SCC 70% | \n",
+ " 7025.911695 | \n",
+ " 7445.251991 | \n",
+ " 60.0 | \n",
+ " 0.843765 | \n",
+ " 129 | \n",
+ " 0 | \n",
+ " 3.906370 | \n",
+ " CIN | \n",
+ " Classical | \n",
"
\n",
" \n",
" 5 | \n",
- " United States | \n",
- " Female | \n",
- " IA | \n",
- " pNX | \n",
- " Stage I | \n",
- " Unifocal | \n",
+ " C3L-00999 | \n",
+ " 56 | \n",
+ " Male | \n",
+ " Russia | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 20 | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " 36.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 80%;SCC 70% | \n",
+ " 8510.704551 | \n",
+ " 8210.549555 | \n",
+ " 30.0 | \n",
+ " 0.548977 | \n",
+ " 159 | \n",
+ " 0 | \n",
+ " 1.148834 | \n",
+ " Immune | \n",
+ " Atypical | \n",
"
\n",
" \n",
" 6 | \n",
- " United States | \n",
- " Female | \n",
- " IA | \n",
- " pNX | \n",
- " Stage I | \n",
- " Unifocal | \n",
+ " C3L-01138 | \n",
+ " 62 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 8 | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " 54.0 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 70% | \n",
+ " 6283.423855 | \n",
+ " 6407.893478 | \n",
+ " 10.0 | \n",
+ " 0.890790 | \n",
+ " 187 | \n",
+ " 1 | \n",
+ " 3.924982 | \n",
+ " Basal | \n",
+ " Classical | \n",
"
\n",
" \n",
" 7 | \n",
- " Other_specify | \n",
- " Female | \n",
- " IA | \n",
- " pNX | \n",
- " Stage I | \n",
- " Unifocal | \n",
+ " C3L-01237 | \n",
+ " 57 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker, more than 15 years | \n",
+ " 17 | \n",
+ " 20 | \n",
+ " 40 | \n",
+ " 6.0 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 80% | \n",
+ " 7175.667725 | \n",
+ " 5720.287055 | \n",
+ " 30.0 | \n",
+ " 0.738328 | \n",
+ " 141 | \n",
+ " 1 | \n",
+ " 4.286490 | \n",
+ " CIN | \n",
+ " Classical | \n",
"
\n",
" \n",
" 8 | \n",
- " United States | \n",
- " Female | \n",
- " IIIA | \n",
- " pNX | \n",
- " Stage III | \n",
- " Unifocal | \n",
+ " C3L-02617 | \n",
+ " 64 | \n",
+ " Male | \n",
+ " Bulgaria | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 20 | \n",
+ " NaN | \n",
+ " 30 | \n",
+ " 66.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 90%;SCC 80%;SCC 80%;SCC 80%;SCC 80% | \n",
+ " 6346.815584 | \n",
+ " 6958.573390 | \n",
+ " NaN | \n",
+ " 0.906466 | \n",
+ " 470 | \n",
+ " 1 | \n",
+ " 4.744818 | \n",
+ " CIN | \n",
+ " Classical | \n",
"
\n",
" \n",
" 9 | \n",
- " United States | \n",
- " Female | \n",
- " IA | \n",
- " pN0 | \n",
- " Stage I | \n",
- " Unifocal | \n",
+ " C3L-02621 | \n",
+ " 68 | \n",
+ " Male | \n",
+ " Bulgaria | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 38 | \n",
+ " NaN | \n",
+ " 30 | \n",
+ " 45.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 70%;SCC 70%;SCC 70%;SCC 70%;SCC 65% | \n",
+ " 6806.384264 | \n",
+ " 8039.787184 | \n",
+ " NaN | \n",
+ " 0.754950 | \n",
+ " 358 | \n",
+ " 0 | \n",
+ " 1.774521 | \n",
+ " CIN | \n",
+ " Atypical | \n",
"
\n",
" \n",
"\n",
+ "10 rows × 37 columns
\n",
""
],
"text/plain": [
- " Country Gender FIGO_stage Path_Stage_Reg_Lymph_Nodes-pN \\\n",
- "0 United States Female IA pN0 \n",
- "1 United States Female IA pNX \n",
- "2 United States Female IA pN0 \n",
- "3 NaN NaN NaN NaN \n",
- "4 United States Female IA pNX \n",
- "5 United States Female IA pNX \n",
- "6 United States Female IA pNX \n",
- "7 Other_specify Female IA pNX \n",
- "8 United States Female IIIA pNX \n",
- "9 United States Female IA pN0 \n",
+ " case_id age gender country \\\n",
+ "0 C3L-00977 56 Male Russia \n",
+ "1 C3L-00987 61 Male Ukraine \n",
+ "2 C3L-00994 50 Male Ukraine \n",
+ "3 C3L-00995 56 Male Ukraine \n",
+ "4 C3L-00997 47 Male Ukraine \n",
+ "5 C3L-00999 56 Male Russia \n",
+ "6 C3L-01138 62 Male Ukraine \n",
+ "7 C3L-01237 57 Male Ukraine \n",
+ "8 C3L-02617 64 Male Bulgaria \n",
+ "9 C3L-02621 68 Male Bulgaria \n",
"\n",
- " tumor_Stage-Pathological Tumor_Focality \n",
- "0 Stage I Unifocal \n",
- "1 Stage IV Unifocal \n",
- "2 Stage I Unifocal \n",
- "3 NaN NaN \n",
- "4 Stage I Unifocal \n",
- "5 Stage I Unifocal \n",
- "6 Stage I Unifocal \n",
- "7 Stage I Unifocal \n",
- "8 Stage III Unifocal \n",
- "9 Stage I Unifocal "
+ " smoking_history smoke_age_start \\\n",
+ "0 Current reformed smoker, years unknown NaN \n",
+ "1 Current smoker: Includes daily and non-daily s... 18 \n",
+ "2 Current reformed smoker within past 15 years 16 \n",
+ "3 Current reformed smoker, more than 15 years 25 \n",
+ "4 Current reformed smoker, more than 15 years 12 \n",
+ "5 Current smoker: Includes daily and non-daily s... 20 \n",
+ "6 Current smoker: Includes daily and non-daily s... 8 \n",
+ "7 Current reformed smoker, more than 15 years 17 \n",
+ "8 Current smoker: Includes daily and non-daily s... 20 \n",
+ "9 Current smoker: Includes daily and non-daily s... 38 \n",
+ "\n",
+ " smoke_age_stop num_smoke_per_day num_pack_years_sm \\\n",
+ "0 NaN 20 NaN \n",
+ "1 NaN 20 43.0 \n",
+ "2 50 6 10.2 \n",
+ "3 56 20 31.0 \n",
+ "4 27 20 15.0 \n",
+ "5 NaN 20 36.0 \n",
+ "6 NaN 20 54.0 \n",
+ "7 20 40 6.0 \n",
+ "8 NaN 30 66.0 \n",
+ "9 NaN 30 45.0 \n",
+ "\n",
+ " smoking_second_hand ... \\\n",
+ "0 Yes ... \n",
+ "1 Yes ... \n",
+ "2 Exposure to secondhand smoke history not avail... ... \n",
+ "3 Exposure to secondhand smoke history not avail... ... \n",
+ "4 Yes ... \n",
+ "5 Yes ... \n",
+ "6 Exposure to secondhand smoke history not avail... ... \n",
+ "7 Exposure to secondhand smoke history not avail... ... \n",
+ "8 Yes ... \n",
+ "9 Yes ... \n",
+ "\n",
+ " tumor_pathology_review ESTIMATE_stromal_score \\\n",
+ "0 SCC 80% 6825.995755 \n",
+ "1 SCC 90%;SCC 90%;SCC 80%;SCC 70% 5999.793467 \n",
+ "2 SCC 40%;SCC 70%;SCC 40%;SCC 75% 8924.036564 \n",
+ "3 SCC 90%;SCC 80%;SCC 40%;SCC 70% 8723.429667 \n",
+ "4 SCC 70%;SCC 70% 7025.911695 \n",
+ "5 SCC 80%;SCC 70% 8510.704551 \n",
+ "6 SCC 70% 6283.423855 \n",
+ "7 SCC 80% 7175.667725 \n",
+ "8 SCC 90%;SCC 80%;SCC 80%;SCC 80%;SCC 80% 6346.815584 \n",
+ "9 SCC 70%;SCC 70%;SCC 70%;SCC 70%;SCC 65% 6806.384264 \n",
+ "\n",
+ " ESTIMATE_immune_score CD3_IHC_count stemness_score mutation_count \\\n",
+ "0 7989.115925 1.0 0.953243 106 \n",
+ "1 4772.409716 0.0 0.825330 83 \n",
+ "2 8176.233903 5.0 0.664581 67 \n",
+ "3 8342.246345 70.0 0.539918 64 \n",
+ "4 7445.251991 60.0 0.843765 129 \n",
+ "5 8210.549555 30.0 0.548977 159 \n",
+ "6 6407.893478 10.0 0.890790 187 \n",
+ "7 5720.287055 30.0 0.738328 141 \n",
+ "8 6958.573390 NaN 0.906466 470 \n",
+ "9 8039.787184 NaN 0.754950 358 \n",
+ "\n",
+ " neoAntigen_count chr_instability_idx integrated_subtype \\\n",
+ "0 0 2.003654 Basal \n",
+ "1 0 5.205612 CIN \n",
+ "2 0 1.684475 Immune \n",
+ "3 0 1.340483 Immune \n",
+ "4 0 3.906370 CIN \n",
+ "5 0 1.148834 Immune \n",
+ "6 1 3.924982 Basal \n",
+ "7 1 4.286490 CIN \n",
+ "8 1 4.744818 CIN \n",
+ "9 0 1.774521 CIN \n",
+ "\n",
+ " transcriptomic_subtype \n",
+ "0 Mesenchymal \n",
+ "1 Classical \n",
+ "2 Mesenchymal \n",
+ "3 Mesenchymal \n",
+ "4 Classical \n",
+ "5 Atypical \n",
+ "6 Classical \n",
+ "7 Classical \n",
+ "8 Classical \n",
+ "9 Atypical \n",
+ "\n",
+ "[10 rows x 37 columns]"
]
},
"execution_count": 2,
@@ -205,16 +445,16 @@
}
],
"source": [
- "df_source = pd.read_csv(\"./datasets/dou.csv\")\n",
- "column_names = [\n",
- " \"Country\",\n",
- " \"Gender\",\n",
- " \"FIGO_stage\",\n",
- " \"Path_Stage_Reg_Lymph_Nodes-pN\",\n",
- " \"tumor_Stage-Pathological\",\n",
- " \"Tumor_Focality\",\n",
- "]\n",
- "df_source = df_source[column_names]\n",
+ "df_source = pd.read_csv(\"./datasets/Huang.csv\")\n",
+ "# column_names = [\n",
+ "# \"Country\",\n",
+ "# \"Gender\",\n",
+ "# \"FIGO_stage\",\n",
+ "# \"Path_Stage_Reg_Lymph_Nodes-pN\",\n",
+ "# \"tumor_Stage-Pathological\",\n",
+ "# \"Tumor_Focality\",\n",
+ "# ]\n",
+ "# df_source = df_source[column_names]\n",
"df_source.head(10)"
]
},
@@ -467,15 +707,22 @@
"execution_count": 4,
"metadata": {},
"outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 37 columns...\n"
+ ]
+ },
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "32cb8bd8acad4941be9dda4e11a51c4f",
+ "model_id": "c53863767484415b8432756f00ca4549",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- " 0%| | 0/6 [00:00, ?it/s]"
+ " 0%| | 0/37 [00:00, ?it/s]"
]
},
"metadata": {},
@@ -485,13 +732,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Table features extracted from 6 columns\n"
+ "Extracting features from 39 columns...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f95fe59637044c6cb7bb50b3c0850405",
+ "model_id": "4f93b80f9c5746b28765395e1ca0d41a",
"version_major": 2,
"version_minor": 0
},
@@ -502,13 +749,6 @@
"metadata": {},
"output_type": "display_data"
},
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Table features extracted from 39 columns\n"
- ]
- },
{
"data": {
"text/html": [
@@ -537,46 +777,232 @@
" \n",
" \n",
" 0 | \n",
- " Country | \n",
- " participant_country | \n",
+ " case_id | \n",
+ " case_id | \n",
"
\n",
" \n",
" 1 | \n",
- " Gender | \n",
- " sex | \n",
+ " age | \n",
+ " age | \n",
"
\n",
" \n",
" 2 | \n",
- " FIGO_stage | \n",
- " tumor_stage_pathological | \n",
+ " gender | \n",
+ " sex | \n",
"
\n",
" \n",
" 3 | \n",
- " Path_Stage_Reg_Lymph_Nodes-pN | \n",
- " pathologic_staging_regional_lymph_nodes_pn | \n",
+ " country | \n",
+ " participant_country | \n",
"
\n",
" \n",
" 4 | \n",
- " tumor_Stage-Pathological | \n",
- " tumor_stage_pathological | \n",
+ " smoking_history | \n",
+ " tobacco_smoking_history | \n",
"
\n",
" \n",
" 5 | \n",
- " Tumor_Focality | \n",
+ " smoke_age_start | \n",
+ " age | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " smoke_age_stop | \n",
+ " age | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " num_smoke_per_day | \n",
+ " tobacco_smoking_history | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " num_pack_years_sm | \n",
+ " bmi | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " smoking_second_hand | \n",
+ " tobacco_smoking_history | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " smoking_inferred | \n",
+ " tobacco_smoking_history | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " alcohol_consum | \n",
+ " alcohol_consumption | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " num_yrs_alc_con | \n",
+ " Neoplastic_cellularity | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " tumor_site_original | \n",
+ " tumor_site | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " tumor_site_curated | \n",
+ " tumor_site | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
" tumor_focality | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " tumor_size_cm | \n",
+ " tumor_size_cm | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " histologic_type | \n",
+ " case_id | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " histologic_grade | \n",
+ " tumor_stage_pathological | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " tumor_necrosis | \n",
+ " tumor_necrosis | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " patho_staging_pt | \n",
+ " pathologic_staging_primary_tumor_pt | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " patho_staging_pn | \n",
+ " pathologic_staging_regional_lymph_nodes_pn | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " clinic_staging_dist_metas | \n",
+ " clinical_staging_distant_metastasis_cm | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " tumor_stage | \n",
+ " tumor_stage_pathological | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " P16 | \n",
+ " Acinar_fraction | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " HPV_inference | \n",
+ " perineural_invasion | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " NAT_pathology_review | \n",
+ " Neoplastic_cellularity | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " tumor_pathology_review | \n",
+ " Stromal_fraction | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " ESTIMATE_stromal_score | \n",
+ " Stromal_fraction | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " ESTIMATE_immune_score | \n",
+ " Stromal_fraction | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " CD3_IHC_count | \n",
+ " Acinar_fraction | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " stemness_score | \n",
+ " Stromal_fraction | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " mutation_count | \n",
+ " bmi | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " neoAntigen_count | \n",
+ " Neoplastic_cellularity | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " chr_instability_idx | \n",
+ " case_id | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " integrated_subtype | \n",
+ " bmi | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " transcriptomic_subtype | \n",
+ " bmi | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " source target\n",
- "0 Country participant_country\n",
- "1 Gender sex\n",
- "2 FIGO_stage tumor_stage_pathological\n",
- "3 Path_Stage_Reg_Lymph_Nodes-pN pathologic_staging_regional_lymph_nodes_pn\n",
- "4 tumor_Stage-Pathological tumor_stage_pathological\n",
- "5 Tumor_Focality tumor_focality"
+ " source target\n",
+ "0 case_id case_id\n",
+ "1 age age\n",
+ "2 gender sex\n",
+ "3 country participant_country\n",
+ "4 smoking_history tobacco_smoking_history\n",
+ "5 smoke_age_start age\n",
+ "6 smoke_age_stop age\n",
+ "7 num_smoke_per_day tobacco_smoking_history\n",
+ "8 num_pack_years_sm bmi\n",
+ "9 smoking_second_hand tobacco_smoking_history\n",
+ "10 smoking_inferred tobacco_smoking_history\n",
+ "11 alcohol_consum alcohol_consumption\n",
+ "12 num_yrs_alc_con Neoplastic_cellularity\n",
+ "13 tumor_site_original tumor_site\n",
+ "14 tumor_site_curated tumor_site\n",
+ "15 tumor_focality tumor_focality\n",
+ "16 tumor_size_cm tumor_size_cm\n",
+ "17 histologic_type case_id\n",
+ "18 histologic_grade tumor_stage_pathological\n",
+ "19 tumor_necrosis tumor_necrosis\n",
+ "20 patho_staging_pt pathologic_staging_primary_tumor_pt\n",
+ "21 patho_staging_pn pathologic_staging_regional_lymph_nodes_pn\n",
+ "22 clinic_staging_dist_metas clinical_staging_distant_metastasis_cm\n",
+ "23 tumor_stage tumor_stage_pathological\n",
+ "24 P16 Acinar_fraction\n",
+ "25 HPV_inference perineural_invasion\n",
+ "26 NAT_pathology_review Neoplastic_cellularity\n",
+ "27 tumor_pathology_review Stromal_fraction\n",
+ "28 ESTIMATE_stromal_score Stromal_fraction\n",
+ "29 ESTIMATE_immune_score Stromal_fraction\n",
+ "30 CD3_IHC_count Acinar_fraction\n",
+ "31 stemness_score Stromal_fraction\n",
+ "32 mutation_count bmi\n",
+ "33 neoAntigen_count Neoplastic_cellularity\n",
+ "34 chr_instability_idx case_id\n",
+ "35 integrated_subtype bmi\n",
+ "36 transcriptomic_subtype bmi"
]
},
"execution_count": 4,
@@ -1115,6 +1541,8 @@
"\n",
"# Print value matches\n",
"for match in value_matches:\n",
+ " bdi.edit_match(match)\n",
+ " bdi.view_match(match)\n",
" display(\n",
" Markdown(\n",
" f\"
**Source column:** {match.attrs['source']}
\"\n",
@@ -1686,7 +2114,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.5"
+ "version": "3.10.12"
}
},
"nbformat": 4,
diff --git a/examples/getting-started copy.ipynb b/examples/getting-started copy.ipynb
new file mode 100644
index 00000000..898d0281
--- /dev/null
+++ b/examples/getting-started copy.ipynb
@@ -0,0 +1,8483 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Getting Started"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Harmonization with `bdikit`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Data harmonization is the process of integrating and aligning data from different sources into a consistent format to ensure compatibility and interoperability across data analyses and systems. `bdikit` is a library the helps with key data harmonization steps:\n",
+ "- *Schema Mapping*: In this step, data from various sources are mapped to a unified schema or model. This involves identifying equivalent table columns and establishing relationships between disparate datasets.\n",
+ "- *Value Mapping (Data Standardization)*: This step involves converting data into a common format or structure, using consistent naming conventions, units, and coding systems to ensure uniformity.\n",
+ "\n",
+ "In this example, we describe how `bdikit` can be used to map a dataset from Dou et al. (https://pubmed.ncbi.nlm.nih.gov/37567170/) to the [GDC (Genomic Data Commons)](https://portal.gdc.cancer.gov/) standard data format.\n",
+ "\n",
+ "First, import the `bdikit` library."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import bdikit as bdi\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, we load the data using Pandas."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " Histologic_Grade_FIGO | \n",
+ " Histologic_type | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " Clin_Stage_Dist_Mets-cM | \n",
+ " Path_Stage_Dist_Mets-pM | \n",
+ " tumor_Stage-Pathological | \n",
+ " FIGO_stage | \n",
+ " BMI | \n",
+ " Age | \n",
+ " Race | \n",
+ " Ethnicity | \n",
+ " Gender | \n",
+ " Tumor_Site | \n",
+ " Tumor_Focality | \n",
+ " Tumor_Size_cm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 38.88 | \n",
+ " 64.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Anterior endometrium | \n",
+ " Unifocal | \n",
+ " 2.9 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage IV | \n",
+ " IA | \n",
+ " 39.76 | \n",
+ " 58.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Posterior endometrium | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 51.19 | \n",
+ " 50.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " No pathologic evidence of distant metastasis | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 32.69 | \n",
+ " 75.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " United States | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 20.28 | \n",
+ " 63.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 55.67 | \n",
+ " 50.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Other_specify | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 25.68 | \n",
+ " 60.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " United States | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage III | \n",
+ " IIIA | \n",
+ " 21.57 | \n",
+ " 83.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1 (FIGO I) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 34.26 | \n",
+ " 69.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 5.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Country Histologic_Grade_FIGO Histologic_type \\\n",
+ "0 United States FIGO grade 1 Endometrioid \n",
+ "1 United States FIGO grade 1 Endometrioid \n",
+ "2 United States FIGO grade 2 Endometrioid \n",
+ "3 NaN NaN Carcinosarcoma \n",
+ "4 United States FIGO grade 2 Endometrioid \n",
+ "5 United States NaN Serous \n",
+ "6 United States FIGO grade 1 Endometrioid \n",
+ "7 Other_specify FIGO grade 2 Endometrioid \n",
+ "8 United States NaN Serous \n",
+ "9 United States FIGO grade 1 Endometrioid \n",
+ "\n",
+ " Path_Stage_Primary_Tumor-pT Path_Stage_Reg_Lymph_Nodes-pN \\\n",
+ "0 pT1a (FIGO IA) pN0 \n",
+ "1 pT1a (FIGO IA) pNX \n",
+ "2 pT1a (FIGO IA) pN0 \n",
+ "3 NaN NaN \n",
+ "4 pT1a (FIGO IA) pNX \n",
+ "5 pT1a (FIGO IA) pNX \n",
+ "6 pT1a (FIGO IA) pNX \n",
+ "7 pT1a (FIGO IA) pNX \n",
+ "8 pT3a (FIGO IIIA) pNX \n",
+ "9 pT1 (FIGO I) pN0 \n",
+ "\n",
+ " Clin_Stage_Dist_Mets-cM Path_Stage_Dist_Mets-pM \\\n",
+ "0 cM0 Staging Incomplete \n",
+ "1 cM0 Staging Incomplete \n",
+ "2 cM0 Staging Incomplete \n",
+ "3 NaN NaN \n",
+ "4 cM0 No pathologic evidence of distant metastasis \n",
+ "5 cM0 Staging Incomplete \n",
+ "6 cM0 Staging Incomplete \n",
+ "7 cM0 Staging Incomplete \n",
+ "8 cM0 Staging Incomplete \n",
+ "9 cM0 Staging Incomplete \n",
+ "\n",
+ " tumor_Stage-Pathological FIGO_stage BMI Age Race \\\n",
+ "0 Stage I IA 38.88 64.0 White \n",
+ "1 Stage IV IA 39.76 58.0 White \n",
+ "2 Stage I IA 51.19 50.0 White \n",
+ "3 NaN NaN NaN NaN NaN \n",
+ "4 Stage I IA 32.69 75.0 White \n",
+ "5 Stage I IA 20.28 63.0 White \n",
+ "6 Stage I IA 55.67 50.0 White \n",
+ "7 Stage I IA 25.68 60.0 White \n",
+ "8 Stage III IIIA 21.57 83.0 White \n",
+ "9 Stage I IA 34.26 69.0 White \n",
+ "\n",
+ " Ethnicity Gender Tumor_Site Tumor_Focality \\\n",
+ "0 Not-Hispanic or Latino Female Anterior endometrium Unifocal \n",
+ "1 Not-Hispanic or Latino Female Posterior endometrium Unifocal \n",
+ "2 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "5 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "6 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "7 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "8 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "9 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "\n",
+ " Tumor_Size_cm \n",
+ "0 2.9 \n",
+ "1 3.5 \n",
+ "2 4.5 \n",
+ "3 NaN \n",
+ "4 3.5 \n",
+ "5 6.0 \n",
+ "6 4.5 \n",
+ "7 5.0 \n",
+ "8 4.0 \n",
+ "9 5.2 "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset = pd.read_csv(\"./datasets/dou.csv\")\n",
+ "\n",
+ "# columns = [\n",
+ "# \"Country\",\n",
+ "# \"Path_Stage_Primary_Tumor-pT\",\n",
+ "# \"FIGO_stage\",\n",
+ "# \"Race\",\n",
+ "# \"Ethnicity\",\n",
+ "# \"Gender\",\n",
+ "# \"Tumor_Focality\",\n",
+ "# \"Tumor_Site\",\n",
+ "# ]\n",
+ "columns = [\n",
+ " \"Country\",\n",
+ " \"Path_Stage_Primary_Tumor-pT\",\n",
+ " \"Histologic_type\",\n",
+ " \"FIGO_stage\",\n",
+ " \"BMI\",\n",
+ " \"Age\",\n",
+ " \"Race\",\n",
+ " \"Ethnicity\",\n",
+ " \"Gender\",\n",
+ " \"Tumor_Focality\",\n",
+ " \"Tumor_Size_cm\",\n",
+ "]\n",
+ "# dataset = dataset[columns]\n",
+ "dataset.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " Histologic_Grade_FIGO | \n",
+ " Histologic_type | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " Clin_Stage_Dist_Mets-cM | \n",
+ " Path_Stage_Dist_Mets-pM | \n",
+ " tumor_Stage-Pathological | \n",
+ " FIGO_stage | \n",
+ " BMI | \n",
+ " Age | \n",
+ " Race | \n",
+ " Ethnicity | \n",
+ " Gender | \n",
+ " Tumor_Site | \n",
+ " Tumor_Focality | \n",
+ " Tumor_Size_cm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 38.88 | \n",
+ " 64.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Anterior endometrium | \n",
+ " Unifocal | \n",
+ " 2.9 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage IV | \n",
+ " IA | \n",
+ " 39.76 | \n",
+ " 58.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Posterior endometrium | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 51.19 | \n",
+ " 50.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " No pathologic evidence of distant metastasis | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 32.69 | \n",
+ " 75.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Ukraine | \n",
+ " FIGO grade 3 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 29.40 | \n",
+ " 75.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.2 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Ukraine | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT2 (FIGO II) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage II | \n",
+ " II | \n",
+ " 35.42 | \n",
+ " 74.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 1.5 | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " United States | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " pT2 (FIGO II) | \n",
+ " pN0 | \n",
+ " Staging Incomplete | \n",
+ " Staging Incomplete | \n",
+ " Stage II | \n",
+ " II | \n",
+ " 24.32 | \n",
+ " 85.0 | \n",
+ " Black or African American | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 3.8 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Ukraine | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " pT1a (FIGO IA) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 34.06 | \n",
+ " 70.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " Ukraine | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Country Histologic_Grade_FIGO Histologic_type \\\n",
+ "0 United States FIGO grade 1 Endometrioid \n",
+ "1 United States FIGO grade 1 Endometrioid \n",
+ "2 United States FIGO grade 2 Endometrioid \n",
+ "3 NaN NaN Carcinosarcoma \n",
+ "4 United States FIGO grade 2 Endometrioid \n",
+ ".. ... ... ... \n",
+ "99 Ukraine FIGO grade 3 Endometrioid \n",
+ "100 Ukraine FIGO grade 2 Endometrioid \n",
+ "101 United States NaN Serous \n",
+ "102 Ukraine NaN Serous \n",
+ "103 Ukraine NaN Serous \n",
+ "\n",
+ " Path_Stage_Primary_Tumor-pT Path_Stage_Reg_Lymph_Nodes-pN \\\n",
+ "0 pT1a (FIGO IA) pN0 \n",
+ "1 pT1a (FIGO IA) pNX \n",
+ "2 pT1a (FIGO IA) pN0 \n",
+ "3 NaN NaN \n",
+ "4 pT1a (FIGO IA) pNX \n",
+ ".. ... ... \n",
+ "99 pT1a (FIGO IA) pNX \n",
+ "100 pT2 (FIGO II) pN0 \n",
+ "101 pT2 (FIGO II) pN0 \n",
+ "102 pT1a (FIGO IA) pN0 \n",
+ "103 NaN NaN \n",
+ "\n",
+ " Clin_Stage_Dist_Mets-cM Path_Stage_Dist_Mets-pM \\\n",
+ "0 cM0 Staging Incomplete \n",
+ "1 cM0 Staging Incomplete \n",
+ "2 cM0 Staging Incomplete \n",
+ "3 NaN NaN \n",
+ "4 cM0 No pathologic evidence of distant metastasis \n",
+ ".. ... ... \n",
+ "99 cM0 Staging Incomplete \n",
+ "100 cM0 Staging Incomplete \n",
+ "101 Staging Incomplete Staging Incomplete \n",
+ "102 cM0 Staging Incomplete \n",
+ "103 NaN NaN \n",
+ "\n",
+ " tumor_Stage-Pathological FIGO_stage BMI Age \\\n",
+ "0 Stage I IA 38.88 64.0 \n",
+ "1 Stage IV IA 39.76 58.0 \n",
+ "2 Stage I IA 51.19 50.0 \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 Stage I IA 32.69 75.0 \n",
+ ".. ... ... ... ... \n",
+ "99 Stage I IA 29.40 75.0 \n",
+ "100 Stage II II 35.42 74.0 \n",
+ "101 Stage II II 24.32 85.0 \n",
+ "102 Stage I IA 34.06 70.0 \n",
+ "103 NaN NaN NaN NaN \n",
+ "\n",
+ " Race Ethnicity Gender \\\n",
+ "0 White Not-Hispanic or Latino Female \n",
+ "1 White Not-Hispanic or Latino Female \n",
+ "2 White Not-Hispanic or Latino Female \n",
+ "3 NaN NaN NaN \n",
+ "4 White Not-Hispanic or Latino Female \n",
+ ".. ... ... ... \n",
+ "99 NaN NaN Female \n",
+ "100 NaN NaN Female \n",
+ "101 Black or African American Not-Hispanic or Latino Female \n",
+ "102 NaN NaN Female \n",
+ "103 NaN NaN NaN \n",
+ "\n",
+ " Tumor_Site Tumor_Focality Tumor_Size_cm \n",
+ "0 Anterior endometrium Unifocal 2.9 \n",
+ "1 Posterior endometrium Unifocal 3.5 \n",
+ "2 Other, specify Unifocal 4.5 \n",
+ "3 NaN NaN NaN \n",
+ "4 Other, specify Unifocal 3.5 \n",
+ ".. ... ... ... \n",
+ "99 Other, specify Unifocal 4.2 \n",
+ "100 Other, specify Unifocal 1.5 \n",
+ "101 Other, specify Unifocal 3.8 \n",
+ "102 Other, specify Unifocal 5.0 \n",
+ "103 NaN NaN NaN \n",
+ "\n",
+ "[104 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Matching the table schema to GDC standard vocabulary\n",
+ "\n",
+ "`bdi-kit` offers a suite of functions to help with data harmonization tasks.\n",
+ "For instance, it can help with automatic discovery of one-to-one mappings between the columns in the input (source) dataset and a target dataset schema. The target schema can be either another table or a standard data vocabulary such as the GDC (Genomic Data Commons).\n",
+ "\n",
+ "To achieve this using `bdi-kit`, we can use the `match_schema()` function to match columns to the GDC vocabulary schema as follows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ " 0%| | 0/11 [00:00, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n",
+ "100%|██████████| 11/11 [00:01<00:00, 9.78it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 11 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 734/734 [00:55<00:00, 13.32it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 734 columns\n",
+ "\n",
+ "Source column: Country\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Federated States of Micronesia', similarity=0.541)]\n",
+ "source: Country target: country_of_birth score: 0.541\n",
+ "value_matches: [ValueMatch(current_value='Other_specify', target_value='other', similarity=0.508), ValueMatch(current_value='nan', target_value='american indian or alaska native', similarity=0.332), ValueMatch(current_value='United States', target_value='white', similarity=0.313), ValueMatch(current_value='Poland', target_value='native hawaiian or other pacific islander', similarity=0.27)]\n",
+ "source: Country target: race score: 1.423\n",
+ "value_matches: [ValueMatch(current_value='Poland', target_value='Andorra', similarity=0.317), ValueMatch(current_value='United States', target_value='Guatemala', similarity=0.261), ValueMatch(current_value='Other_specify', target_value='Jersey', similarity=0.253)]\n",
+ "source: Country target: country_of_residence_at_enrollment score: 0.8310000000000001\n",
+ "value_matches: [ValueMatch(current_value='Other_specify', target_value='Peripheral zone', similarity=0.288)]\n",
+ "source: Country target: zone_of_origin_prostate score: 0.288\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Indeterminate', similarity=0.325)]\n",
+ "source: Country target: non_nodal_regional_disease score: 0.325\n",
+ "value_matches: []\n",
+ "source: Country target: submission_enabled score: 0\n",
+ "value_matches: []\n",
+ "source: Country target: is_legacy score: 0\n",
+ "value_matches: []\n",
+ "source: Country target: oct_embedded score: 0\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Not Reported', similarity=0.325)]\n",
+ "source: Country target: perineural_invasion_present score: 0.325\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Indeterminate', similarity=0.325)]\n",
+ "source: Country target: ovarian_surface_involvement score: 0.325\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='not reported', similarity=0.311)]\n",
+ "source: Country target: ethnicity score: 0.311\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Not Reported', similarity=0.338)]\n",
+ "source: Country target: overrepresented_sequences score: 0.338\n",
+ "value_matches: []\n",
+ "source: Country target: released score: 0\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Not Reported', similarity=0.325)]\n",
+ "source: Country target: chemo_concurrent_to_radiation score: 0.325\n",
+ "value_matches: []\n",
+ "source: Country target: status score: 0\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Indeterminate', similarity=0.325)]\n",
+ "source: Country target: satellite_nodule_present score: 0.325\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Not Reported', similarity=0.315)]\n",
+ "source: Country target: ulceration_indicator score: 0.315\n",
+ "value_matches: []\n",
+ "source: Country target: request_submission score: 0\n",
+ "value_matches: []\n",
+ "source: Country target: consent_type score: 0\n",
+ "value_matches: [ValueMatch(current_value='United States', target_value='Not Reported', similarity=0.338)]\n",
+ "source: Country target: kmer_content score: 0.338\n",
+ "Top k reranked columns: [('Country', 'race', 1.423), ('Country', 'country_of_residence_at_enrollment', 0.8310000000000001), ('Country', 'country_of_birth', 0.541), ('Country', 'overrepresented_sequences', 0.338), ('Country', 'kmer_content', 0.338), ('Country', 'non_nodal_regional_disease', 0.325), ('Country', 'perineural_invasion_present', 0.325), ('Country', 'ovarian_surface_involvement', 0.325), ('Country', 'chemo_concurrent_to_radiation', 0.325), ('Country', 'satellite_nodule_present', 0.325), ('Country', 'ulceration_indicator', 0.315), ('Country', 'ethnicity', 0.311), ('Country', 'zone_of_origin_prostate', 0.288), ('Country', 'submission_enabled', 0), ('Country', 'is_legacy', 0), ('Country', 'oct_embedded', 0), ('Country', 'released', 0), ('Country', 'status', 0), ('Country', 'request_submission', 0), ('Country', 'consent_type', 0)]\n",
+ "\n",
+ "Source column: Path_Stage_Primary_Tumor-pT\n",
+ "value_matches: [ValueMatch(current_value='pT1a (FIGO IA)', target_value='T1a', similarity=0.563), ValueMatch(current_value='pT1 (FIGO I)', target_value='T1', similarity=0.341), ValueMatch(current_value='pT1b (FIGO IB)', target_value='T1', similarity=0.254)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: uicc_pathologic_t score: 1.158\n",
+ "value_matches: [ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='T3b', similarity=0.478), ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='T3a', similarity=0.468), ValueMatch(current_value='pT1b (FIGO IB)', target_value='T1b2', similarity=0.445), ValueMatch(current_value='pT2 (FIGO II)', target_value='T2', similarity=0.369), ValueMatch(current_value='pT1 (FIGO I)', target_value='T1', similarity=0.358), ValueMatch(current_value='pT1a (FIGO IA)', target_value='T1', similarity=0.273)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: ajcc_pathologic_t score: 2.391\n",
+ "value_matches: [ValueMatch(current_value='pT1 (FIGO I)', target_value='T1', similarity=0.424), ValueMatch(current_value='pT2 (FIGO II)', target_value='T2', similarity=0.424), ValueMatch(current_value='pT1a (FIGO IA)', target_value='T1', similarity=0.311), ValueMatch(current_value='pT1b (FIGO IB)', target_value='T1', similarity=0.302), ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='T3', similarity=0.26), ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='T3', similarity=0.256)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: ensat_pathologic_t score: 1.977\n",
+ "value_matches: [ValueMatch(current_value='pT1a (FIGO IA)', target_value='T1a', similarity=0.603), ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='T3a', similarity=0.46)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: uicc_clinical_t score: 1.063\n",
+ "value_matches: [ValueMatch(current_value='pT1b (FIGO IB)', target_value='T1b', similarity=0.622), ValueMatch(current_value='pT1 (FIGO I)', target_value='T1b', similarity=0.255)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: ajcc_clinical_t score: 0.877\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='nan', similarity=1.0)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: extrathyroid_extension score: 1.0\n",
+ "value_matches: [ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='Stage IIIA1', similarity=0.489), ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='Stage IIIA1', similarity=0.318), ValueMatch(current_value='pT1b (FIGO IB)', target_value='Stage IB', similarity=0.315), ValueMatch(current_value='pT2 (FIGO II)', target_value='Stage IIIC2', similarity=0.297), ValueMatch(current_value='pT1a (FIGO IA)', target_value='Stage IA1', similarity=0.272)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: uicc_pathologic_stage score: 1.6909999999999998\n",
+ "value_matches: []\n",
+ "source: Path_Stage_Primary_Tumor-pT target: somatic_mutation_indexes score: 0\n",
+ "value_matches: [ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='Stage IIIA', similarity=0.549), ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='Stage IIB', similarity=0.487), ValueMatch(current_value='pT1b (FIGO IB)', target_value='Stage IIB', similarity=0.324), ValueMatch(current_value='pT2 (FIGO II)', target_value='Stage III', similarity=0.314), ValueMatch(current_value='pT1a (FIGO IA)', target_value='Stage IIA1', similarity=0.292)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: ajcc_pathologic_stage score: 1.9660000000000002\n",
+ "value_matches: []\n",
+ "source: Path_Stage_Primary_Tumor-pT target: annotated_somatic_mutations score: 0\n",
+ "value_matches: []\n",
+ "source: Path_Stage_Primary_Tumor-pT target: masked_somatic_mutations score: 0\n",
+ "value_matches: [ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='Stage IIIB', similarity=0.554), ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='Stage IIIAii', similarity=0.524), ValueMatch(current_value='nan', target_value='Unknown', similarity=0.398), ValueMatch(current_value='pT2 (FIGO II)', target_value='Stage IIIAii', similarity=0.303), ValueMatch(current_value='pT1b (FIGO IB)', target_value='Stage IB1', similarity=0.294), ValueMatch(current_value='pT1a (FIGO IA)', target_value='Stage IA', similarity=0.291)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: figo_stage score: 2.364\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Transitional zone', similarity=0.297)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: tumor_level_prostate score: 0.297\n",
+ "value_matches: [ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='Stage III', similarity=0.434), ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='Stage III', similarity=0.363), ValueMatch(current_value='pT2 (FIGO II)', target_value='Stage III', similarity=0.304)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: ensat_pathologic_stage score: 1.101\n",
+ "value_matches: [ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='Stage IIIB', similarity=0.555), ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='Stage IIIA', similarity=0.523), ValueMatch(current_value='pT1b (FIGO IB)', target_value='Stage IIIB', similarity=0.301), ValueMatch(current_value='pT1a (FIGO IA)', target_value='Stage IA', similarity=0.261), ValueMatch(current_value='pT2 (FIGO II)', target_value='Stage IIIA', similarity=0.252)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: uicc_clinical_stage score: 1.8920000000000001\n",
+ "value_matches: []\n",
+ "source: Path_Stage_Primary_Tumor-pT target: tumor_grade_category score: 0\n",
+ "value_matches: [ValueMatch(current_value='pT3a (FIGO IIIA)', target_value='Stage IIIA', similarity=0.549), ValueMatch(current_value='pT3b (FIGO IIIB)', target_value='Stage III', similarity=0.397), ValueMatch(current_value='pT2 (FIGO II)', target_value='Stage III', similarity=0.328), ValueMatch(current_value='pT1a (FIGO IA)', target_value='Stage IIA', similarity=0.278), ValueMatch(current_value='pT1b (FIGO IB)', target_value='Stage IB2', similarity=0.262)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: ajcc_clinical_stage score: 1.814\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Unknown', similarity=0.345)]\n",
+ "source: Path_Stage_Primary_Tumor-pT target: inss_stage score: 0.345\n",
+ "value_matches: []\n",
+ "source: Path_Stage_Primary_Tumor-pT target: adapter_sequence score: 0\n",
+ "value_matches: []\n",
+ "source: Path_Stage_Primary_Tumor-pT target: margin_distance score: 0\n",
+ "Top k reranked columns: [('Path_Stage_Primary_Tumor-pT', 'ajcc_pathologic_t', 2.391), ('Path_Stage_Primary_Tumor-pT', 'figo_stage', 2.364), ('Path_Stage_Primary_Tumor-pT', 'ensat_pathologic_t', 1.977), ('Path_Stage_Primary_Tumor-pT', 'ajcc_pathologic_stage', 1.9660000000000002), ('Path_Stage_Primary_Tumor-pT', 'uicc_clinical_stage', 1.8920000000000001), ('Path_Stage_Primary_Tumor-pT', 'ajcc_clinical_stage', 1.814), ('Path_Stage_Primary_Tumor-pT', 'uicc_pathologic_stage', 1.6909999999999998), ('Path_Stage_Primary_Tumor-pT', 'uicc_pathologic_t', 1.158), ('Path_Stage_Primary_Tumor-pT', 'ensat_pathologic_stage', 1.101), ('Path_Stage_Primary_Tumor-pT', 'uicc_clinical_t', 1.063), ('Path_Stage_Primary_Tumor-pT', 'extrathyroid_extension', 1.0), ('Path_Stage_Primary_Tumor-pT', 'ajcc_clinical_t', 0.877), ('Path_Stage_Primary_Tumor-pT', 'inss_stage', 0.345), ('Path_Stage_Primary_Tumor-pT', 'tumor_level_prostate', 0.297), ('Path_Stage_Primary_Tumor-pT', 'somatic_mutation_indexes', 0), ('Path_Stage_Primary_Tumor-pT', 'annotated_somatic_mutations', 0), ('Path_Stage_Primary_Tumor-pT', 'masked_somatic_mutations', 0), ('Path_Stage_Primary_Tumor-pT', 'tumor_grade_category', 0), ('Path_Stage_Primary_Tumor-pT', 'adapter_sequence', 0), ('Path_Stage_Primary_Tumor-pT', 'margin_distance', 0)]\n",
+ "\n",
+ "Source column: Histologic_type\n",
+ "value_matches: [ValueMatch(current_value='Clear cell', target_value='Colorectal Cancer', similarity=0.313), ValueMatch(current_value='Carcinosarcoma', target_value='Phenochromocytoma or Paraganglioma', similarity=0.309)]\n",
+ "source: Histologic_type target: history_of_tumor_type score: 0.622\n",
+ "value_matches: [ValueMatch(current_value='Carcinosarcoma', target_value='Carcinomatous', similarity=0.695), ValueMatch(current_value='Serous', target_value='Fibrous', similarity=0.393), ValueMatch(current_value='Endometrioid', target_value='Myeloid', similarity=0.252)]\n",
+ "source: Histologic_type target: roots score: 1.34\n",
+ "value_matches: [ValueMatch(current_value='Endometrioid', target_value='Intermediate Risk', similarity=0.277)]\n",
+ "source: Histologic_type target: cog_rhabdomyosarcoma_risk_group score: 0.277\n",
+ "value_matches: [ValueMatch(current_value='Serous', target_value='Mucinous and Serous Neoplasms', similarity=0.615), ValueMatch(current_value='Clear cell', target_value='Germ Cell Neoplasms', similarity=0.371), ValueMatch(current_value='Carcinosarcoma', target_value='NOS', similarity=0.304)]\n",
+ "source: Histologic_type target: disease_type score: 1.29\n",
+ "value_matches: [ValueMatch(current_value='Serous', target_value='Ovarian serous carcinoma', similarity=0.576), ValueMatch(current_value='Carcinosarcoma', target_value='Basal cell carcinoma', similarity=0.472), ValueMatch(current_value='Clear cell', target_value='Renal cell carcinoma', similarity=0.44)]\n",
+ "source: Histologic_type target: described_cases score: 1.488\n",
+ "value_matches: [ValueMatch(current_value='Carcinosarcoma', target_value='Chromium scATAC v1 Library', similarity=0.27)]\n",
+ "source: Histologic_type target: single_cell_library score: 0.27\n",
+ "value_matches: []\n",
+ "source: Histologic_type target: sarcomatoid_present score: 0\n",
+ "value_matches: [ValueMatch(current_value='Endometrioid', target_value='Bilateral ovaries with endometriotic cyst and surface adhesions', similarity=0.476), ValueMatch(current_value='Carcinosarcoma', target_value='Keratinizing dysplasia; severe (carcinoma in situ)', similarity=0.43), ValueMatch(current_value='Clear cell', target_value='Diffuse and early nodular diabetic glomerulosclerosis', similarity=0.288), ValueMatch(current_value='Serous', target_value='Diffuse and early nodular diabetic glomerulosclerosis', similarity=0.258)]\n",
+ "source: Histologic_type target: additional_pathology_findings score: 1.452\n",
+ "value_matches: [ValueMatch(current_value='Carcinosarcoma', target_value='Osteosarcoma', similarity=0.584), ValueMatch(current_value='Clear cell', target_value='Squamous cell carcinoma', similarity=0.353), ValueMatch(current_value='Serous', target_value='Squamous cell carcinoma', similarity=0.259)]\n",
+ "source: Histologic_type target: pathology_details score: 1.196\n",
+ "value_matches: [ValueMatch(current_value='Carcinosarcoma', target_value='Osteosarcoma', similarity=0.611), ValueMatch(current_value='Clear cell', target_value='Basal cell carcinoma', similarity=0.441), ValueMatch(current_value='Serous', target_value='Squamous cell carcinoma', similarity=0.259)]\n",
+ "source: Histologic_type target: pathology_reports score: 1.311\n",
+ "value_matches: [ValueMatch(current_value='Carcinosarcoma', target_value='Kaposi Sarcoma', similarity=0.582), ValueMatch(current_value='Clear cell', target_value='Gallbladder Cancer', similarity=0.263)]\n",
+ "source: Histologic_type target: relationship_primary_diagnosis score: 0.845\n",
+ "value_matches: [ValueMatch(current_value='Endometrioid', target_value='Not Reported', similarity=0.257)]\n",
+ "source: Histologic_type target: chromosome score: 0.257\n",
+ "value_matches: []\n",
+ "source: Histologic_type target: histone_variant score: 0\n",
+ "value_matches: [ValueMatch(current_value='Carcinosarcoma', target_value='Carcinoma', similarity=0.783), ValueMatch(current_value='Endometrioid', target_value='Endometrial stromal nodule', similarity=0.564), ValueMatch(current_value='Serous', target_value='Pseudomucinous cystadenoma', similarity=0.256)]\n",
+ "source: Histologic_type target: primary_diagnosis score: 1.603\n",
+ "value_matches: [ValueMatch(current_value='Clear cell', target_value='Buccal Cells', similarity=0.412), ValueMatch(current_value='Endometrioid', target_value='2D Modified Conditionally Reprogrammed Cells', similarity=0.292)]\n",
+ "source: Histologic_type target: composition score: 0.704\n",
+ "value_matches: [ValueMatch(current_value='Clear cell', target_value='Mononuclear Cells from Bone Marrow', similarity=0.491), ValueMatch(current_value='Serous', target_value='Serum', similarity=0.371), ValueMatch(current_value='Carcinosarcoma', target_value='Mononuclear Cells from Bone Marrow', similarity=0.262)]\n",
+ "source: Histologic_type target: specimen_type score: 1.124\n",
+ "value_matches: []\n",
+ "source: Histologic_type target: icd_10_code score: 0\n",
+ "value_matches: [ValueMatch(current_value='Clear cell', target_value='Likely Pathogenic', similarity=0.252)]\n",
+ "source: Histologic_type target: pathogenicity score: 0.252\n",
+ "value_matches: []\n",
+ "source: Histologic_type target: papillary_renal_cell_type score: 0\n",
+ "value_matches: []\n",
+ "source: Histologic_type target: antigen score: 0\n",
+ "Top k reranked columns: [('Histologic_type', 'primary_diagnosis', 1.603), ('Histologic_type', 'described_cases', 1.488), ('Histologic_type', 'additional_pathology_findings', 1.452), ('Histologic_type', 'roots', 1.34), ('Histologic_type', 'pathology_reports', 1.311), ('Histologic_type', 'disease_type', 1.29), ('Histologic_type', 'pathology_details', 1.196), ('Histologic_type', 'specimen_type', 1.124), ('Histologic_type', 'relationship_primary_diagnosis', 0.845), ('Histologic_type', 'composition', 0.704), ('Histologic_type', 'history_of_tumor_type', 0.622), ('Histologic_type', 'cog_rhabdomyosarcoma_risk_group', 0.277), ('Histologic_type', 'single_cell_library', 0.27), ('Histologic_type', 'chromosome', 0.257), ('Histologic_type', 'pathogenicity', 0.252), ('Histologic_type', 'sarcomatoid_present', 0), ('Histologic_type', 'histone_variant', 0), ('Histologic_type', 'icd_10_code', 0), ('Histologic_type', 'papillary_renal_cell_type', 0), ('Histologic_type', 'antigen', 0)]\n",
+ "\n",
+ "Source column: FIGO_stage\n",
+ "value_matches: [ValueMatch(current_value='IIIC1', target_value='Stage IIIC1', similarity=0.818), ValueMatch(current_value='IIIB', target_value='Stage IIIB', similarity=0.76), ValueMatch(current_value='IIIA', target_value='Stage IIIAii', similarity=0.736), ValueMatch(current_value='IVB', target_value='Stage IVB', similarity=0.718), ValueMatch(current_value='II', target_value='Stage IIIAii', similarity=0.628), ValueMatch(current_value='IIIC2', target_value='Stage IC2', similarity=0.555), ValueMatch(current_value='IA', target_value='Stage IA', similarity=0.539), ValueMatch(current_value='IB', target_value='Stage IIIB', similarity=0.456), ValueMatch(current_value='nan', target_value='Unknown', similarity=0.394)]\n",
+ "source: FIGO_stage target: figo_stage score: 5.604\n",
+ "value_matches: [ValueMatch(current_value='IIIA', target_value='Stage IIIA', similarity=0.774), ValueMatch(current_value='IVB', target_value='Stage IVB', similarity=0.742), ValueMatch(current_value='II', target_value='Stage III', similarity=0.636), ValueMatch(current_value='IIIC2', target_value='Stage IIIC', similarity=0.62), ValueMatch(current_value='IIIC1', target_value='Stage IIIC', similarity=0.604), ValueMatch(current_value='IA', target_value='Stage IIA', similarity=0.532), ValueMatch(current_value='IIIB', target_value='Stage III', similarity=0.511), ValueMatch(current_value='IB', target_value='Stage IB2', similarity=0.427)]\n",
+ "source: FIGO_stage target: ajcc_clinical_stage score: 4.846\n",
+ "value_matches: [ValueMatch(current_value='IIIC2', target_value='Stage IIIC2', similarity=0.84), ValueMatch(current_value='IVB', target_value='Stage IVB', similarity=0.727), ValueMatch(current_value='IIIA', target_value='Stage IIIA1', similarity=0.689), ValueMatch(current_value='IIIC1', target_value='Stage IIIC2', similarity=0.539), ValueMatch(current_value='IA', target_value='Stage IA', similarity=0.527), ValueMatch(current_value='IB', target_value='Stage IB', similarity=0.518), ValueMatch(current_value='II', target_value='Stage IIIA1', similarity=0.494), ValueMatch(current_value='IIIB', target_value='Stage IIIA1', similarity=0.408)]\n",
+ "source: FIGO_stage target: uicc_pathologic_stage score: 4.742\n",
+ "value_matches: [ValueMatch(current_value='IIIB', target_value='Stage IIIB', similarity=0.771), ValueMatch(current_value='IIIA', target_value='Stage IIIA', similarity=0.757), ValueMatch(current_value='IVB', target_value='Stage IVB', similarity=0.741), ValueMatch(current_value='IIIC1', target_value='Stage IIC1', similarity=0.738), ValueMatch(current_value='IIIC2', target_value='Stage IIIC', similarity=0.561), ValueMatch(current_value='II', target_value='Stage IIIA', similarity=0.532), ValueMatch(current_value='IA', target_value='Stage IA', similarity=0.518), ValueMatch(current_value='IB', target_value='Stage IIIB', similarity=0.488)]\n",
+ "source: FIGO_stage target: uicc_clinical_stage score: 5.106\n",
+ "value_matches: [ValueMatch(current_value='IIIA', target_value='Group IIIa', similarity=0.739), ValueMatch(current_value='IIIB', target_value='Group IIIb', similarity=0.739), ValueMatch(current_value='II', target_value='Group IIIb', similarity=0.518), ValueMatch(current_value='IA', target_value='Group Ia', similarity=0.485), ValueMatch(current_value='IB', target_value='Group Ib', similarity=0.485), ValueMatch(current_value='IIIC1', target_value='Group IIc', similarity=0.47), ValueMatch(current_value='IIIC2', target_value='Group IIc', similarity=0.47), ValueMatch(current_value='IVB', target_value='Group IV', similarity=0.378)]\n",
+ "source: FIGO_stage target: irs_group score: 4.284\n",
+ "value_matches: [ValueMatch(current_value='IIIA', target_value='Stage IIIA', similarity=0.761), ValueMatch(current_value='IVB', target_value='Stage IVB', similarity=0.725), ValueMatch(current_value='IIIB', target_value='Stage IIB', similarity=0.626), ValueMatch(current_value='II', target_value='Stage III', similarity=0.617), ValueMatch(current_value='IB', target_value='Stage IIB', similarity=0.509), ValueMatch(current_value='IA', target_value='Stage IIIA', similarity=0.507), ValueMatch(current_value='IIIC1', target_value='Stage III', similarity=0.399), ValueMatch(current_value='IIIC2', target_value='Stage III', similarity=0.389)]\n",
+ "source: FIGO_stage target: ajcc_pathologic_stage score: 4.533\n",
+ "value_matches: [ValueMatch(current_value='II', target_value='II', similarity=1.0), ValueMatch(current_value='IIIB', target_value='III', similarity=0.752), ValueMatch(current_value='IIIA', target_value='III', similarity=0.752), ValueMatch(current_value='IIIC1', target_value='III', similarity=0.606), ValueMatch(current_value='IIIC2', target_value='III', similarity=0.606), ValueMatch(current_value='IA', target_value='I', similarity=0.331), ValueMatch(current_value='IB', target_value='I', similarity=0.331), ValueMatch(current_value='nan', target_value='Unknown', similarity=0.327)]\n",
+ "source: FIGO_stage target: iss_stage score: 4.705\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Unknown', similarity=0.344)]\n",
+ "source: FIGO_stage target: inss_stage score: 0.344\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Unknown', similarity=0.33), ValueMatch(current_value='IIIC1', target_value='1', similarity=0.281), ValueMatch(current_value='IIIC2', target_value='2', similarity=0.281)]\n",
+ "source: FIGO_stage target: irs_stage score: 0.892\n",
+ "value_matches: [ValueMatch(current_value='II', target_value='Stage III', similarity=0.544), ValueMatch(current_value='IIIA', target_value='Stage III', similarity=0.502), ValueMatch(current_value='IIIB', target_value='Stage III', similarity=0.443), ValueMatch(current_value='IIIC1', target_value='Stage III', similarity=0.357), ValueMatch(current_value='IIIC2', target_value='Stage III', similarity=0.357), ValueMatch(current_value='nan', target_value='Unknown', similarity=0.338), ValueMatch(current_value='IVB', target_value='Stage IV', similarity=0.299)]\n",
+ "source: FIGO_stage target: cog_liver_stage score: 2.8400000000000003\n",
+ "value_matches: []\n",
+ "source: FIGO_stage target: shortest_dimension score: 0\n",
+ "value_matches: [ValueMatch(current_value='IB', target_value='GB', similarity=0.313), ValueMatch(current_value='nan', target_value='Unknown', similarity=0.31)]\n",
+ "source: FIGO_stage target: tumor_grade score: 0.623\n",
+ "value_matches: []\n",
+ "source: FIGO_stage target: fragment_minimum_length score: 0\n",
+ "value_matches: [ValueMatch(current_value='IVB', target_value='Stage IVb', similarity=0.665), ValueMatch(current_value='IIIA', target_value='Stage IIa', similarity=0.573), ValueMatch(current_value='IIIB', target_value='Stage IIb', similarity=0.564), ValueMatch(current_value='II', target_value='Stage III', similarity=0.56), ValueMatch(current_value='IA', target_value='Stage IIa', similarity=0.507), ValueMatch(current_value='IB', target_value='Stage IIb', similarity=0.458), ValueMatch(current_value='IIIC2', target_value='Stage III', similarity=0.353), ValueMatch(current_value='IIIC1', target_value='Stage III', similarity=0.353)]\n",
+ "source: FIGO_stage target: masaoka_stage score: 4.033\n",
+ "value_matches: [ValueMatch(current_value='II', target_value='Stage III', similarity=0.544), ValueMatch(current_value='IIIA', target_value='Stage III', similarity=0.502), ValueMatch(current_value='IIIB', target_value='Stage III', similarity=0.443), ValueMatch(current_value='IIIC1', target_value='Stage III', similarity=0.357), ValueMatch(current_value='IIIC2', target_value='Stage III', similarity=0.357), ValueMatch(current_value='nan', target_value='Unknown', similarity=0.338), ValueMatch(current_value='IVB', target_value='Stage IV', similarity=0.299)]\n",
+ "source: FIGO_stage target: cog_renal_stage score: 2.8400000000000003\n",
+ "value_matches: []\n",
+ "source: FIGO_stage target: fragment_maximum_length score: 0\n",
+ "value_matches: [ValueMatch(current_value='II', target_value='Stage III', similarity=0.544), ValueMatch(current_value='IIIA', target_value='Stage III', similarity=0.502), ValueMatch(current_value='IIIB', target_value='Stage III', similarity=0.443), ValueMatch(current_value='IIIC1', target_value='Stage III', similarity=0.357), ValueMatch(current_value='IIIC2', target_value='Stage III', similarity=0.357), ValueMatch(current_value='nan', target_value='Unknown', similarity=0.338), ValueMatch(current_value='IVB', target_value='Stage IV', similarity=0.299)]\n",
+ "source: FIGO_stage target: ann_arbor_clinical_stage score: 2.8400000000000003\n",
+ "value_matches: []\n",
+ "source: FIGO_stage target: fragment_mean_length score: 0\n",
+ "value_matches: [ValueMatch(current_value='II', target_value='Class III', similarity=0.541), ValueMatch(current_value='IIIA', target_value='Class III', similarity=0.492), ValueMatch(current_value='IIIB', target_value='Class III', similarity=0.437), ValueMatch(current_value='IIIC1', target_value='Class III', similarity=0.397), ValueMatch(current_value='IIIC2', target_value='Class III', similarity=0.397), ValueMatch(current_value='IVB', target_value='Class IV', similarity=0.281)]\n",
+ "source: FIGO_stage target: myasthenia_gravis_classification score: 2.5450000000000004\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Unknown', similarity=0.33)]\n",
+ "source: FIGO_stage target: who_nte_grade score: 0.33\n",
+ "Top k reranked columns: [('FIGO_stage', 'figo_stage', 5.604), ('FIGO_stage', 'uicc_clinical_stage', 5.106), ('FIGO_stage', 'ajcc_clinical_stage', 4.846), ('FIGO_stage', 'uicc_pathologic_stage', 4.742), ('FIGO_stage', 'iss_stage', 4.705), ('FIGO_stage', 'ajcc_pathologic_stage', 4.533), ('FIGO_stage', 'irs_group', 4.284), ('FIGO_stage', 'masaoka_stage', 4.033), ('FIGO_stage', 'cog_liver_stage', 2.8400000000000003), ('FIGO_stage', 'cog_renal_stage', 2.8400000000000003), ('FIGO_stage', 'ann_arbor_clinical_stage', 2.8400000000000003), ('FIGO_stage', 'myasthenia_gravis_classification', 2.5450000000000004), ('FIGO_stage', 'irs_stage', 0.892), ('FIGO_stage', 'tumor_grade', 0.623), ('FIGO_stage', 'inss_stage', 0.344), ('FIGO_stage', 'who_nte_grade', 0.33), ('FIGO_stage', 'shortest_dimension', 0), ('FIGO_stage', 'fragment_minimum_length', 0), ('FIGO_stage', 'fragment_maximum_length', 0), ('FIGO_stage', 'fragment_mean_length', 0)]\n",
+ "\n",
+ "Source column: BMI\n",
+ "value_matches: [ValueMatch(current_value='28.84', target_value='28.8', similarity=0.789), ValueMatch(current_value='35.42', target_value='35.4', similarity=0.78), ValueMatch(current_value='38.88', target_value='28.8', similarity=0.53), ValueMatch(current_value='32.32', target_value='33.2', similarity=0.43), ValueMatch(current_value='23.88', target_value='28.8', similarity=0.398), ValueMatch(current_value='44.0', target_value='40.2', similarity=0.383), ValueMatch(current_value='28.5', target_value='28.8', similarity=0.377), ValueMatch(current_value='17.11', target_value='31.7', similarity=0.377), ValueMatch(current_value='34.0', target_value='40.2', similarity=0.377), ValueMatch(current_value='20.55', target_value='30.5', similarity=0.372), ValueMatch(current_value='26.22', target_value='22.3', similarity=0.357), ValueMatch(current_value='38.89', target_value='28.8', similarity=0.356), ValueMatch(current_value='36.2', target_value='23.6', similarity=0.353), ValueMatch(current_value='35.0', target_value='35.4', similarity=0.353), ValueMatch(current_value='20.28', target_value='40.2', similarity=0.347), ValueMatch(current_value='39.14', target_value='29.1', similarity=0.344), ValueMatch(current_value='46.64', target_value='26.4', similarity=0.34), ValueMatch(current_value='38.54', target_value='35.4', similarity=0.34), ValueMatch(current_value='33.65', target_value='33.2', similarity=0.336), ValueMatch(current_value='43.0', target_value='30.5', similarity=0.331), ValueMatch(current_value='31.22', target_value='22.3', similarity=0.33), ValueMatch(current_value='55.86', target_value='25.8', similarity=0.318), ValueMatch(current_value='26.0', target_value='26.4', similarity=0.316), ValueMatch(current_value='25.68', target_value='25.8', similarity=0.315), ValueMatch(current_value='31.83', target_value='18.5', similarity=0.314), ValueMatch(current_value='21.83', target_value='18.5', similarity=0.314), ValueMatch(current_value='46.45', target_value='26.4', similarity=0.314), ValueMatch(current_value='17.85', target_value='18.5', similarity=0.313), ValueMatch(current_value='46.41', target_value='26.4', similarity=0.309), ValueMatch(current_value='22.86', target_value='28.8', similarity=0.308), ValueMatch(current_value='17.64', target_value='31.7', similarity=0.306), ValueMatch(current_value='30.85', target_value='30.5', similarity=0.305), ValueMatch(current_value='24.32', target_value='24.9', similarity=0.305), ValueMatch(current_value='32.83', target_value='33.2', similarity=0.305), ValueMatch(current_value='34.26', target_value='26.4', similarity=0.304), ValueMatch(current_value='36.0', target_value='23.6', similarity=0.299), ValueMatch(current_value='29.4', target_value='29.1', similarity=0.299), ValueMatch(current_value='27.1', target_value='27.5', similarity=0.298), ValueMatch(current_value='40.72', target_value='40.2', similarity=0.295), ValueMatch(current_value='45.83', target_value='25.8', similarity=0.292), ValueMatch(current_value='29.37', target_value='37.9', similarity=0.291), ValueMatch(current_value='37.69', target_value='37.9', similarity=0.29), ValueMatch(current_value='31.0', target_value='31.7', similarity=0.288), ValueMatch(current_value='27.31', target_value='31.7', similarity=0.286), ValueMatch(current_value='31.58', target_value='25.8', similarity=0.285), ValueMatch(current_value='26.14', target_value='26.4', similarity=0.284), ValueMatch(current_value='34.06', target_value='40.2', similarity=0.279), ValueMatch(current_value='27.0', target_value='27.5', similarity=0.276), ValueMatch(current_value='25.37', target_value='25.8', similarity=0.274), ValueMatch(current_value='29.62', target_value='29.1', similarity=0.265), ValueMatch(current_value='27.82', target_value='27.5', similarity=0.262), ValueMatch(current_value='30.48', target_value='30.5', similarity=0.262), ValueMatch(current_value='46.85', target_value='18.5', similarity=0.26), ValueMatch(current_value='25.03', target_value='25.8', similarity=0.258), ValueMatch(current_value='34.37', target_value='37.9', similarity=0.258), ValueMatch(current_value='30.47', target_value='30.5', similarity=0.257), ValueMatch(current_value='29.52', target_value='29.1', similarity=0.254)]\n",
+ "source: BMI target: bmi score: 19.058\n",
+ "value_matches: [ValueMatch(current_value='31.22', target_value='31.2', similarity=0.785), ValueMatch(current_value='25.37', target_value='25.3', similarity=0.773), ValueMatch(current_value='30.48', target_value='30.4', similarity=0.771), ValueMatch(current_value='27.82', target_value='27.8', similarity=0.767), ValueMatch(current_value='34.53', target_value='34.5', similarity=0.767), ValueMatch(current_value='27.83', target_value='27.8', similarity=0.763), ValueMatch(current_value='38.44', target_value='38.4', similarity=0.76), ValueMatch(current_value='30.47', target_value='30.4', similarity=0.757), ValueMatch(current_value='38.41', target_value='38.4', similarity=0.732), ValueMatch(current_value='32.69', target_value='32.6', similarity=0.728), ValueMatch(current_value='32.32', target_value='32.6', similarity=0.416), ValueMatch(current_value='43.0', target_value='30.4', similarity=0.41), ValueMatch(current_value='44.0', target_value='40.1', similarity=0.367), ValueMatch(current_value='34.0', target_value='40.1', similarity=0.364), ValueMatch(current_value='34.84', target_value='38.4', similarity=0.362), ValueMatch(current_value='45.53', target_value='34.5', similarity=0.359), ValueMatch(current_value='26.22', target_value='22.5', similarity=0.357), ValueMatch(current_value='29.4', target_value='29.9', similarity=0.349), ValueMatch(current_value='27.1', target_value='27.8', similarity=0.34), ValueMatch(current_value='35.0', target_value='35.2', similarity=0.339), ValueMatch(current_value='26.0', target_value='32.6', similarity=0.334), ValueMatch(current_value='33.65', target_value='33.7', similarity=0.329), ValueMatch(current_value='46.45', target_value='34.5', similarity=0.324), ValueMatch(current_value='24.32', target_value='24.1', similarity=0.324), ValueMatch(current_value='29.52', target_value='35.2', similarity=0.324), ValueMatch(current_value='41.44', target_value='24.1', similarity=0.324), ValueMatch(current_value='47.82', target_value='27.8', similarity=0.322), ValueMatch(current_value='48.46', target_value='38.4', similarity=0.316), ValueMatch(current_value='28.84', target_value='38.4', similarity=0.316), ValueMatch(current_value='27.0', target_value='27.8', similarity=0.315), ValueMatch(current_value='45.83', target_value='34.5', similarity=0.314), ValueMatch(current_value='36.84', target_value='38.4', similarity=0.313), ValueMatch(current_value='46.41', target_value='24.1', similarity=0.311), ValueMatch(current_value='35.42', target_value='35.2', similarity=0.307), ValueMatch(current_value='34.37', target_value='33.7', similarity=0.307), ValueMatch(current_value='34.26', target_value='32.6', similarity=0.306), ValueMatch(current_value='22.86', target_value='22.5', similarity=0.304), ValueMatch(current_value='28.5', target_value='28.7', similarity=0.3), ValueMatch(current_value='29.62', target_value='29.9', similarity=0.3), ValueMatch(current_value='38.88', target_value='38.4', similarity=0.292), ValueMatch(current_value='23.88', target_value='38.4', similarity=0.292), ValueMatch(current_value='38.89', target_value='38.4', similarity=0.291), ValueMatch(current_value='31.0', target_value='31.2', similarity=0.29), ValueMatch(current_value='25.03', target_value='25.3', similarity=0.287), ValueMatch(current_value='38.54', target_value='38.4', similarity=0.287), ValueMatch(current_value='32.06', target_value='32.6', similarity=0.287), ValueMatch(current_value='29.37', target_value='37.9', similarity=0.284), ValueMatch(current_value='25.68', target_value='22.5', similarity=0.283), ValueMatch(current_value='17.85', target_value='27.8', similarity=0.282), ValueMatch(current_value='37.69', target_value='37.9', similarity=0.282), ValueMatch(current_value='21.45', target_value='34.5', similarity=0.28), ValueMatch(current_value='42.98', target_value='29.9', similarity=0.276), ValueMatch(current_value='32.83', target_value='32.6', similarity=0.274), ValueMatch(current_value='30.85', target_value='30.4', similarity=0.27), ValueMatch(current_value='59.78', target_value='27.8', similarity=0.27), ValueMatch(current_value='34.06', target_value='40.1', similarity=0.268), ValueMatch(current_value='29.07', target_value='29.9', similarity=0.267), ValueMatch(current_value='27.31', target_value='31.2', similarity=0.265), ValueMatch(current_value='31.83', target_value='31.2', similarity=0.261), ValueMatch(current_value='26.14', target_value='32.6', similarity=0.256), ValueMatch(current_value='40.72', target_value='40.1', similarity=0.253)]\n",
+ "source: BMI target: average_base_quality score: 23.353\n",
+ "value_matches: [ValueMatch(current_value='29.4', target_value='2.94', similarity=1.0), ValueMatch(current_value='31.63', target_value='1.63', similarity=0.816), ValueMatch(current_value='32.06', target_value='2.06', similarity=0.788), ValueMatch(current_value='26.14', target_value='2.61', similarity=0.775), ValueMatch(current_value='32.83', target_value='2.83', similarity=0.771), ValueMatch(current_value='22.86', target_value='2.28', similarity=0.756), ValueMatch(current_value='25.37', target_value='2.5', similarity=0.461), ValueMatch(current_value='25.68', target_value='2.5', similarity=0.454), ValueMatch(current_value='25.03', target_value='2.5', similarity=0.436), ValueMatch(current_value='20.55', target_value='3.05', similarity=0.374), ValueMatch(current_value='26.22', target_value='2.28', similarity=0.372), ValueMatch(current_value='39.2', target_value='2.39', similarity=0.369), ValueMatch(current_value='51.19', target_value='1.95', similarity=0.356), ValueMatch(current_value='28.5', target_value='2.28', similarity=0.356), ValueMatch(current_value='17.11', target_value='2.17', similarity=0.349), ValueMatch(current_value='27.1', target_value='2.72', similarity=0.347), ValueMatch(current_value='34.06', target_value='2.06', similarity=0.334), ValueMatch(current_value='43.0', target_value='3.05', similarity=0.332), ValueMatch(current_value='27.82', target_value='2.72', similarity=0.332), ValueMatch(current_value='21.57', target_value='1.52', similarity=0.324), ValueMatch(current_value='34.72', target_value='2.72', similarity=0.324), ValueMatch(current_value='29.52', target_value='1.52', similarity=0.322), ValueMatch(current_value='27.0', target_value='2.72', similarity=0.32), ValueMatch(current_value='20.28', target_value='2.28', similarity=0.318), ValueMatch(current_value='31.22', target_value='2.28', similarity=0.316), ValueMatch(current_value='21.83', target_value='1.85', similarity=0.313), ValueMatch(current_value='17.85', target_value='1.85', similarity=0.311), ValueMatch(current_value='28.84', target_value='2.28', similarity=0.311), ValueMatch(current_value='40.72', target_value='2.72', similarity=0.31), ValueMatch(current_value='31.83', target_value='1.85', similarity=0.309), ValueMatch(current_value='26.0', target_value='2.61', similarity=0.308), ValueMatch(current_value='30.85', target_value='3.05', similarity=0.306), ValueMatch(current_value='17.64', target_value='1.74', similarity=0.302), ValueMatch(current_value='27.83', target_value='2.83', similarity=0.301), ValueMatch(current_value='31.58', target_value='1.52', similarity=0.297), ValueMatch(current_value='32.32', target_value='2.39', similarity=0.29), ValueMatch(current_value='42.19', target_value='1.95', similarity=0.289), ValueMatch(current_value='42.98', target_value='2.94', similarity=0.288), ValueMatch(current_value='31.96', target_value='1.95', similarity=0.286), ValueMatch(current_value='29.62', target_value='2.94', similarity=0.275), ValueMatch(current_value='68.39', target_value='2.39', similarity=0.274), ValueMatch(current_value='23.88', target_value='2.39', similarity=0.27), ValueMatch(current_value='39.76', target_value='2.39', similarity=0.265), ValueMatch(current_value='30.48', target_value='3.05', similarity=0.263), ValueMatch(current_value='27.31', target_value='2.72', similarity=0.261), ValueMatch(current_value='46.85', target_value='1.85', similarity=0.261), ValueMatch(current_value='21.45', target_value='2.17', similarity=0.261), ValueMatch(current_value='39.14', target_value='2.39', similarity=0.259), ValueMatch(current_value='38.54', target_value='1.85', similarity=0.258), ValueMatch(current_value='30.47', target_value='3.05', similarity=0.258), ValueMatch(current_value='34.26', target_value='2.61', similarity=0.252), ValueMatch(current_value='27.66', target_value='2.72', similarity=0.251), ValueMatch(current_value='45.83', target_value='2.83', similarity=0.25)]\n",
+ "source: BMI target: body_surface_area score: 19.580999999999996\n",
+ "value_matches: [ValueMatch(current_value='34.06', target_value='40.6', similarity=0.814), ValueMatch(current_value='42.19', target_value='21.9', similarity=0.78), ValueMatch(current_value='30.85', target_value='30.8', similarity=0.78), ValueMatch(current_value='27.1', target_value='7.1', similarity=0.606), ValueMatch(current_value='17.11', target_value='7.1', similarity=0.569), ValueMatch(current_value='37.11', target_value='7.1', similarity=0.538), ValueMatch(current_value='34.84', target_value='4.8', similarity=0.535), ValueMatch(current_value='48.46', target_value='4.8', similarity=0.515), ValueMatch(current_value='29.37', target_value='9.3', similarity=0.509), ValueMatch(current_value='44.81', target_value='4.8', similarity=0.486), ValueMatch(current_value='25.37', target_value='2.5', similarity=0.468), ValueMatch(current_value='25.68', target_value='2.5', similarity=0.459), ValueMatch(current_value='65.71', target_value='7.1', similarity=0.453), ValueMatch(current_value='34.89', target_value='4.8', similarity=0.451), ValueMatch(current_value='25.03', target_value='2.5', similarity=0.441), ValueMatch(current_value='30.48', target_value='4.8', similarity=0.438), ValueMatch(current_value='71.09', target_value='7.1', similarity=0.425), ValueMatch(current_value='44.0', target_value='40.6', similarity=0.392), ValueMatch(current_value='24.32', target_value='24.2', similarity=0.39), ValueMatch(current_value='34.0', target_value='40.6', similarity=0.388), ValueMatch(current_value='43.0', target_value='30.8', similarity=0.346), ValueMatch(current_value='41.44', target_value='34.1', similarity=0.344), ValueMatch(current_value='27.31', target_value='37.3', similarity=0.34), ValueMatch(current_value='38.41', target_value='34.1', similarity=0.339), ValueMatch(current_value='51.19', target_value='21.9', similarity=0.336), ValueMatch(current_value='31.22', target_value='12.0', similarity=0.335), ValueMatch(current_value='46.41', target_value='34.1', similarity=0.33), ValueMatch(current_value='32.06', target_value='40.6', similarity=0.327), ValueMatch(current_value='34.37', target_value='37.3', similarity=0.325), ValueMatch(current_value='31.96', target_value='21.9', similarity=0.312), ValueMatch(current_value='38.44', target_value='43.8', similarity=0.311), ValueMatch(current_value='31.58', target_value='15.4', similarity=0.304), ValueMatch(current_value='21.83', target_value='18.6', similarity=0.303), ValueMatch(current_value='31.83', target_value='18.6', similarity=0.3), ValueMatch(current_value='21.57', target_value='15.4', similarity=0.299), ValueMatch(current_value='38.54', target_value='15.4', similarity=0.298), ValueMatch(current_value='20.28', target_value='12.0', similarity=0.296), ValueMatch(current_value='35.42', target_value='15.4', similarity=0.288), ValueMatch(current_value='22.86', target_value='18.6', similarity=0.281), ValueMatch(current_value='34.26', target_value='24.2', similarity=0.281), ValueMatch(current_value='27.0', target_value='27.5', similarity=0.279), ValueMatch(current_value='55.86', target_value='18.6', similarity=0.271), ValueMatch(current_value='40.72', target_value='40.6', similarity=0.269), ValueMatch(current_value='23.88', target_value='43.8', similarity=0.268), ValueMatch(current_value='38.89', target_value='43.8', similarity=0.268), ValueMatch(current_value='21.45', target_value='21.9', similarity=0.267), ValueMatch(current_value='38.88', target_value='43.8', similarity=0.267), ValueMatch(current_value='27.82', target_value='27.5', similarity=0.267), ValueMatch(current_value='30.47', target_value='30.8', similarity=0.265), ValueMatch(current_value='42.98', target_value='24.2', similarity=0.262), ValueMatch(current_value='37.69', target_value='37.3', similarity=0.256), ValueMatch(current_value='20.55', target_value='12.0', similarity=0.255)]\n",
+ "source: BMI target: intermediate_dimension score: 19.926\n",
+ "value_matches: [ValueMatch(current_value='24.32', target_value='43.2', similarity=0.759), ValueMatch(current_value='21.57', target_value='21.5', similarity=0.755), ValueMatch(current_value='27.1', target_value='7.1', similarity=0.603), ValueMatch(current_value='17.11', target_value='7.1', similarity=0.57), ValueMatch(current_value='37.11', target_value='7.1', similarity=0.536), ValueMatch(current_value='34.84', target_value='4.8', similarity=0.535), ValueMatch(current_value='48.46', target_value='4.8', similarity=0.519), ValueMatch(current_value='29.37', target_value='9.3', similarity=0.506), ValueMatch(current_value='44.81', target_value='4.8', similarity=0.487), ValueMatch(current_value='25.37', target_value='2.5', similarity=0.462), ValueMatch(current_value='25.68', target_value='2.5', similarity=0.46), ValueMatch(current_value='65.71', target_value='7.1', similarity=0.458), ValueMatch(current_value='34.89', target_value='4.8', similarity=0.454), ValueMatch(current_value='25.03', target_value='2.5', similarity=0.437), ValueMatch(current_value='30.48', target_value='4.8', similarity=0.435), ValueMatch(current_value='71.09', target_value='7.1', similarity=0.426), ValueMatch(current_value='32.32', target_value='43.2', similarity=0.402), ValueMatch(current_value='38.89', target_value='28.9', similarity=0.35), ValueMatch(current_value='43.0', target_value='43.2', similarity=0.35), ValueMatch(current_value='21.83', target_value='18.2', similarity=0.344), ValueMatch(current_value='35.0', target_value='35.6', similarity=0.342), ValueMatch(current_value='46.45', target_value='46.5', similarity=0.338), ValueMatch(current_value='42.98', target_value='39.8', similarity=0.335), ValueMatch(current_value='31.22', target_value='12.0', similarity=0.331), ValueMatch(current_value='55.67', target_value='35.6', similarity=0.324), ValueMatch(current_value='39.2', target_value='39.8', similarity=0.319), ValueMatch(current_value='27.82', target_value='18.2', similarity=0.318), ValueMatch(current_value='21.45', target_value='21.5', similarity=0.317), ValueMatch(current_value='28.5', target_value='28.9', similarity=0.314), ValueMatch(current_value='47.82', target_value='24.7', similarity=0.313), ValueMatch(current_value='34.72', target_value='24.7', similarity=0.311), ValueMatch(current_value='46.64', target_value='46.5', similarity=0.31), ValueMatch(current_value='34.37', target_value='43.2', similarity=0.31), ValueMatch(current_value='68.39', target_value='39.8', similarity=0.31), ValueMatch(current_value='31.58', target_value='21.5', similarity=0.307), ValueMatch(current_value='46.0', target_value='46.5', similarity=0.305), ValueMatch(current_value='36.57', target_value='46.5', similarity=0.305), ValueMatch(current_value='31.83', target_value='18.2', similarity=0.303), ValueMatch(current_value='46.85', target_value='46.5', similarity=0.303), ValueMatch(current_value='38.97', target_value='28.9', similarity=0.302), ValueMatch(current_value='38.54', target_value='15.4', similarity=0.299), ValueMatch(current_value='20.28', target_value='12.0', similarity=0.298), ValueMatch(current_value='35.42', target_value='15.4', similarity=0.291), ValueMatch(current_value='33.65', target_value='46.5', similarity=0.288), ValueMatch(current_value='28.84', target_value='28.9', similarity=0.282), ValueMatch(current_value='46.41', target_value='46.5', similarity=0.278), ValueMatch(current_value='32.06', target_value='12.0', similarity=0.278), ValueMatch(current_value='30.47', target_value='24.7', similarity=0.271), ValueMatch(current_value='32.83', target_value='43.2', similarity=0.266), ValueMatch(current_value='22.86', target_value='28.9', similarity=0.264), ValueMatch(current_value='39.76', target_value='39.8', similarity=0.257), ValueMatch(current_value='20.55', target_value='12.0', similarity=0.257), ValueMatch(current_value='42.19', target_value='32.1', similarity=0.257), ValueMatch(current_value='39.14', target_value='39.8', similarity=0.252)]\n",
+ "source: BMI target: recist_targeted_regions_sum score: 20.003000000000004\n",
+ "value_matches: [ValueMatch(current_value='36.2', target_value='36.2', similarity=1.0), ValueMatch(current_value='42.19', target_value='21.9', similarity=0.772), ValueMatch(current_value='44.0', target_value='4.4', similarity=0.629), ValueMatch(current_value='41.44', target_value='4.4', similarity=0.585), ValueMatch(current_value='38.44', target_value='4.4', similarity=0.525), ValueMatch(current_value='29.52', target_value='5.2', similarity=0.523), ValueMatch(current_value='44.81', target_value='4.4', similarity=0.493), ValueMatch(current_value='38.97', target_value='9.7', similarity=0.48), ValueMatch(current_value='39.76', target_value='9.7', similarity=0.476), ValueMatch(current_value='59.78', target_value='9.7', similarity=0.458), ValueMatch(current_value='31.0', target_value='11.0', similarity=0.448), ValueMatch(current_value='34.0', target_value='40.0', similarity=0.422), ValueMatch(current_value='17.11', target_value='11.0', similarity=0.409), ValueMatch(current_value='37.11', target_value='11.0', similarity=0.354), ValueMatch(current_value='31.22', target_value='12.8', similarity=0.344), ValueMatch(current_value='28.5', target_value='18.5', similarity=0.343), ValueMatch(current_value='26.22', target_value='36.2', similarity=0.342), ValueMatch(current_value='33.65', target_value='33.1', similarity=0.341), ValueMatch(current_value='51.19', target_value='21.9', similarity=0.336), ValueMatch(current_value='71.09', target_value='11.0', similarity=0.327), ValueMatch(current_value='31.63', target_value='26.3', similarity=0.326), ValueMatch(current_value='55.67', target_value='15.6', similarity=0.323), ValueMatch(current_value='29.62', target_value='36.2', similarity=0.317), ValueMatch(current_value='21.83', target_value='18.5', similarity=0.313), ValueMatch(current_value='34.06', target_value='40.0', similarity=0.312), ValueMatch(current_value='29.4', target_value='29.7', similarity=0.312), ValueMatch(current_value='17.85', target_value='18.5', similarity=0.31), ValueMatch(current_value='31.96', target_value='21.9', similarity=0.31), ValueMatch(current_value='31.83', target_value='18.5', similarity=0.31), ValueMatch(current_value='36.0', target_value='36.2', similarity=0.309), ValueMatch(current_value='32.32', target_value='23.4', similarity=0.306), ValueMatch(current_value='26.0', target_value='26.3', similarity=0.306), ValueMatch(current_value='29.37', target_value='29.7', similarity=0.305), ValueMatch(current_value='31.58', target_value='15.6', similarity=0.303), ValueMatch(current_value='21.57', target_value='15.6', similarity=0.298), ValueMatch(current_value='25.68', target_value='15.6', similarity=0.295), ValueMatch(current_value='29.07', target_value='29.7', similarity=0.294), ValueMatch(current_value='40.72', target_value='40.0', similarity=0.293), ValueMatch(current_value='23.88', target_value='23.4', similarity=0.284), ValueMatch(current_value='34.26', target_value='26.3', similarity=0.283), ValueMatch(current_value='28.84', target_value='12.8', similarity=0.28), ValueMatch(current_value='34.84', target_value='4.4', similarity=0.27), ValueMatch(current_value='32.69', target_value='26.3', similarity=0.27), ValueMatch(current_value='21.45', target_value='21.9', similarity=0.266), ValueMatch(current_value='46.85', target_value='18.5', similarity=0.264), ValueMatch(current_value='48.46', target_value='4.4', similarity=0.261), ValueMatch(current_value='38.54', target_value='18.5', similarity=0.261), ValueMatch(current_value='22.86', target_value='12.8', similarity=0.261), ValueMatch(current_value='46.45', target_value='4.4', similarity=0.257), ValueMatch(current_value='20.28', target_value='12.8', similarity=0.255), ValueMatch(current_value='36.84', target_value='36.2', similarity=0.254), ValueMatch(current_value='46.41', target_value='4.4', similarity=0.253), ValueMatch(current_value='30.85', target_value='18.5', similarity=0.25)]\n",
+ "source: BMI target: percent_stromal_cells score: 19.118\n",
+ "value_matches: [ValueMatch(current_value='31.22', target_value='31.2', similarity=0.772), ValueMatch(current_value='17.85', target_value='17.8', similarity=0.77), ValueMatch(current_value='33.65', target_value='36.5', similarity=0.759), ValueMatch(current_value='36.57', target_value='36.5', similarity=0.758), ValueMatch(current_value='34.84', target_value='4.8', similarity=0.541), ValueMatch(current_value='48.46', target_value='4.8', similarity=0.522), ValueMatch(current_value='27.31', target_value='7.3', similarity=0.506), ValueMatch(current_value='44.81', target_value='4.8', similarity=0.492), ValueMatch(current_value='34.89', target_value='4.8', similarity=0.456), ValueMatch(current_value='25.37', target_value='2.5', similarity=0.455), ValueMatch(current_value='25.68', target_value='2.5', similarity=0.446), ValueMatch(current_value='30.48', target_value='4.8', similarity=0.444), ValueMatch(current_value='25.03', target_value='2.5', similarity=0.433), ValueMatch(current_value='31.0', target_value='10.1', similarity=0.429), ValueMatch(current_value='35.0', target_value='15.0', similarity=0.382), ValueMatch(current_value='20.55', target_value='25.5', similarity=0.374), ValueMatch(current_value='17.11', target_value='17.8', similarity=0.371), ValueMatch(current_value='45.53', target_value='25.5', similarity=0.357), ValueMatch(current_value='38.89', target_value='28.9', similarity=0.351), ValueMatch(current_value='55.86', target_value='25.5', similarity=0.347), ValueMatch(current_value='55.67', target_value='25.5', similarity=0.337), ValueMatch(current_value='42.98', target_value='39.8', similarity=0.336), ValueMatch(current_value='39.2', target_value='39.8', similarity=0.321), ValueMatch(current_value='34.37', target_value='33.7', similarity=0.319), ValueMatch(current_value='36.2', target_value='36.5', similarity=0.318), ValueMatch(current_value='28.5', target_value='28.9', similarity=0.314), ValueMatch(current_value='65.71', target_value='36.5', similarity=0.314), ValueMatch(current_value='68.39', target_value='39.8', similarity=0.312), ValueMatch(current_value='36.0', target_value='36.5', similarity=0.311), ValueMatch(current_value='71.09', target_value='10.1', similarity=0.31), ValueMatch(current_value='30.47', target_value='20.4', similarity=0.309), ValueMatch(current_value='26.0', target_value='12.6', similarity=0.308), ValueMatch(current_value='38.97', target_value='28.9', similarity=0.304), ValueMatch(current_value='20.28', target_value='20.4', similarity=0.303), ValueMatch(current_value='32.32', target_value='23.1', similarity=0.302), ValueMatch(current_value='17.64', target_value='17.8', similarity=0.301), ValueMatch(current_value='31.58', target_value='15.0', similarity=0.3), ValueMatch(current_value='26.22', target_value='12.6', similarity=0.292), ValueMatch(current_value='21.57', target_value='15.0', similarity=0.291), ValueMatch(current_value='26.14', target_value='12.6', similarity=0.288), ValueMatch(current_value='27.83', target_value='17.8', similarity=0.282), ValueMatch(current_value='28.84', target_value='28.9', similarity=0.282), ValueMatch(current_value='23.88', target_value='23.1', similarity=0.282), ValueMatch(current_value='32.06', target_value='20.4', similarity=0.28), ValueMatch(current_value='27.82', target_value='17.8', similarity=0.269), ValueMatch(current_value='47.82', target_value='17.8', similarity=0.266), ValueMatch(current_value='22.86', target_value='28.9', similarity=0.264), ValueMatch(current_value='39.76', target_value='39.8', similarity=0.259), ValueMatch(current_value='31.83', target_value='31.2', similarity=0.259), ValueMatch(current_value='36.84', target_value='36.5', similarity=0.256), ValueMatch(current_value='37.11', target_value='33.7', similarity=0.255), ValueMatch(current_value='29.37', target_value='33.7', similarity=0.255), ValueMatch(current_value='37.69', target_value='33.7', similarity=0.255), ValueMatch(current_value='59.78', target_value='17.8', similarity=0.253), ValueMatch(current_value='39.14', target_value='39.8', similarity=0.253), ValueMatch(current_value='34.26', target_value='12.6', similarity=0.252)]\n",
+ "source: BMI target: longest_dimension score: 20.377\n",
+ "value_matches: [ValueMatch(current_value='20.55', target_value='2.05', similarity=0.8), ValueMatch(current_value='21.57', target_value='2.15', similarity=0.758), ValueMatch(current_value='17.11', target_value='1.7', similarity=0.57), ValueMatch(current_value='26.22', target_value='2.2', similarity=0.568), ValueMatch(current_value='51.19', target_value='1.9', similarity=0.509), ValueMatch(current_value='32.32', target_value='2.3', similarity=0.501), ValueMatch(current_value='20.28', target_value='2.0', similarity=0.497), ValueMatch(current_value='21.83', target_value='1.8', similarity=0.49), ValueMatch(current_value='31.83', target_value='1.8', similarity=0.48), ValueMatch(current_value='22.86', target_value='2.2', similarity=0.478), ValueMatch(current_value='31.22', target_value='2.2', similarity=0.478), ValueMatch(current_value='42.19', target_value='1.9', similarity=0.478), ValueMatch(current_value='31.96', target_value='1.9', similarity=0.471), ValueMatch(current_value='17.85', target_value='1.7', similarity=0.469), ValueMatch(current_value='31.63', target_value='1.6', similarity=0.465), ValueMatch(current_value='23.88', target_value='2.3', similarity=0.463), ValueMatch(current_value='17.64', target_value='1.7', similarity=0.461), ValueMatch(current_value='32.06', target_value='2.0', similarity=0.459), ValueMatch(current_value='21.45', target_value='2.1', similarity=0.439), ValueMatch(current_value='28.5', target_value='1.85', similarity=0.341), ValueMatch(current_value='65.71', target_value='1.65', similarity=0.328), ValueMatch(current_value='29.52', target_value='1.95', similarity=0.318), ValueMatch(current_value='31.58', target_value='2.15', similarity=0.315), ValueMatch(current_value='25.37', target_value='2.25', similarity=0.299), ValueMatch(current_value='25.68', target_value='2.25', similarity=0.295), ValueMatch(current_value='36.57', target_value='1.65', similarity=0.29), ValueMatch(current_value='25.03', target_value='2.25', similarity=0.283), ValueMatch(current_value='33.65', target_value='1.65', similarity=0.273), ValueMatch(current_value='46.85', target_value='1.85', similarity=0.264), ValueMatch(current_value='38.54', target_value='1.85', similarity=0.26), ValueMatch(current_value='37.11', target_value='1.7', similarity=0.255)]\n",
+ "source: BMI target: a260_a280_ratio score: 13.354999999999995\n",
+ "value_matches: [ValueMatch(current_value='45.53', target_value='45.5', similarity=0.786), ValueMatch(current_value='29.37', target_value='29.3', similarity=0.773), ValueMatch(current_value='29.4', target_value='9.4', similarity=0.654), ValueMatch(current_value='29.52', target_value='5.2', similarity=0.523), ValueMatch(current_value='36.0', target_value='60.8', similarity=0.379), ValueMatch(current_value='46.0', target_value='60.8', similarity=0.376), ValueMatch(current_value='26.0', target_value='60.8', similarity=0.374), ValueMatch(current_value='55.86', target_value='45.5', similarity=0.361), ValueMatch(current_value='55.67', target_value='45.5', similarity=0.353), ValueMatch(current_value='27.1', target_value='27.7', similarity=0.35), ValueMatch(current_value='20.55', target_value='45.5', similarity=0.35), ValueMatch(current_value='41.44', target_value='34.1', similarity=0.348), ValueMatch(current_value='51.19', target_value='51.2', similarity=0.347), ValueMatch(current_value='38.41', target_value='34.1', similarity=0.343), ValueMatch(current_value='46.45', target_value='45.5', similarity=0.341), ValueMatch(current_value='28.5', target_value='18.5', similarity=0.339), ValueMatch(current_value='46.41', target_value='34.1', similarity=0.334), ValueMatch(current_value='30.85', target_value='60.8', similarity=0.33), ValueMatch(current_value='31.22', target_value='12.8', similarity=0.328), ValueMatch(current_value='39.2', target_value='43.9', similarity=0.328), ValueMatch(current_value='43.0', target_value='43.9', similarity=0.327), ValueMatch(current_value='27.0', target_value='27.7', similarity=0.323), ValueMatch(current_value='21.83', target_value='18.5', similarity=0.311), ValueMatch(current_value='31.83', target_value='18.5', similarity=0.309), ValueMatch(current_value='17.85', target_value='18.5', similarity=0.308), ValueMatch(current_value='32.32', target_value='23.4', similarity=0.308), ValueMatch(current_value='45.83', target_value='45.5', similarity=0.306), ValueMatch(current_value='34.53', target_value='45.5', similarity=0.305), ValueMatch(current_value='21.45', target_value='45.5', similarity=0.303), ValueMatch(current_value='27.82', target_value='27.7', similarity=0.301), ValueMatch(current_value='22.86', target_value='38.6', similarity=0.299), ValueMatch(current_value='39.14', target_value='43.9', similarity=0.298), ValueMatch(current_value='31.63', target_value='16.7', similarity=0.297), ValueMatch(current_value='34.37', target_value='43.9', similarity=0.291), ValueMatch(current_value='23.88', target_value='23.4', similarity=0.288), ValueMatch(current_value='28.84', target_value='12.8', similarity=0.282), ValueMatch(current_value='27.83', target_value='27.7', similarity=0.281), ValueMatch(current_value='29.62', target_value='29.3', similarity=0.28), ValueMatch(current_value='34.0', target_value='34.1', similarity=0.274), ValueMatch(current_value='68.39', target_value='43.9', similarity=0.273), ValueMatch(current_value='39.76', target_value='43.9', similarity=0.265), ValueMatch(current_value='27.31', target_value='27.7', similarity=0.264), ValueMatch(current_value='38.88', target_value='38.6', similarity=0.262), ValueMatch(current_value='38.89', target_value='38.6', similarity=0.262), ValueMatch(current_value='46.85', target_value='18.5', similarity=0.261), ValueMatch(current_value='38.54', target_value='18.5', similarity=0.259), ValueMatch(current_value='20.28', target_value='12.8', similarity=0.259), ValueMatch(current_value='34.84', target_value='34.1', similarity=0.256), ValueMatch(current_value='27.66', target_value='27.7', similarity=0.254)]\n",
+ "source: BMI target: spindle_cell_percent score: 16.623\n",
+ "value_matches: [ValueMatch(current_value='29.52', target_value='95.2', similarity=0.817), ValueMatch(current_value='55.86', target_value='55.8', similarity=0.777), ValueMatch(current_value='34.84', target_value='4.8', similarity=0.535), ValueMatch(current_value='48.46', target_value='4.8', similarity=0.517), ValueMatch(current_value='44.81', target_value='4.8', similarity=0.487), ValueMatch(current_value='34.72', target_value='7.2', similarity=0.486), ValueMatch(current_value='40.72', target_value='7.2', similarity=0.464), ValueMatch(current_value='34.89', target_value='4.8', similarity=0.454), ValueMatch(current_value='30.48', target_value='4.8', similarity=0.434), ValueMatch(current_value='27.0', target_value='87.0', similarity=0.407), ValueMatch(current_value='29.4', target_value='69.4', similarity=0.405), ValueMatch(current_value='45.53', target_value='55.8', similarity=0.358), ValueMatch(current_value='31.22', target_value='12.5', similarity=0.343), ValueMatch(current_value='20.55', target_value='55.8', similarity=0.34), ValueMatch(current_value='55.67', target_value='55.8', similarity=0.338), ValueMatch(current_value='33.65', target_value='33.7', similarity=0.337), ValueMatch(current_value='35.42', target_value='25.4', similarity=0.331), ValueMatch(current_value='45.83', target_value='55.8', similarity=0.33), ValueMatch(current_value='38.89', target_value='18.9', similarity=0.329), ValueMatch(current_value='42.98', target_value='98.5', similarity=0.328), ValueMatch(current_value='27.82', target_value='82.1', similarity=0.327), ValueMatch(current_value='28.5', target_value='98.5', similarity=0.325), ValueMatch(current_value='34.37', target_value='33.7', similarity=0.323), ValueMatch(current_value='31.58', target_value='55.8', similarity=0.321), ValueMatch(current_value='31.63', target_value='76.3', similarity=0.319), ValueMatch(current_value='21.83', target_value='82.1', similarity=0.314), ValueMatch(current_value='38.54', target_value='25.4', similarity=0.307), ValueMatch(current_value='37.69', target_value='69.4', similarity=0.303), ValueMatch(current_value='32.69', target_value='69.4', similarity=0.303), ValueMatch(current_value='27.66', target_value='76.3', similarity=0.302), ValueMatch(current_value='31.83', target_value='18.9', similarity=0.301), ValueMatch(current_value='39.76', target_value='76.3', similarity=0.294), ValueMatch(current_value='31.96', target_value='49.6', similarity=0.293), ValueMatch(current_value='47.82', target_value='82.1', similarity=0.287), ValueMatch(current_value='29.62', target_value='49.6', similarity=0.285), ValueMatch(current_value='38.97', target_value='18.9', similarity=0.284), ValueMatch(current_value='17.64', target_value='76.3', similarity=0.264), ValueMatch(current_value='25.37', target_value='33.7', similarity=0.264), ValueMatch(current_value='21.45', target_value='82.1', similarity=0.263), ValueMatch(current_value='25.68', target_value='25.4', similarity=0.258), ValueMatch(current_value='42.19', target_value='82.1', similarity=0.258), ValueMatch(current_value='29.37', target_value='33.7', similarity=0.257), ValueMatch(current_value='37.11', target_value='33.7', similarity=0.255), ValueMatch(current_value='21.57', target_value='82.1', similarity=0.253), ValueMatch(current_value='46.85', target_value='98.5', similarity=0.252)]\n",
+ "source: BMI target: percent_normal_cells score: 16.029\n",
+ "value_matches: [ValueMatch(current_value='27.1', target_value='7.1', similarity=0.602), ValueMatch(current_value='17.11', target_value='7.1', similarity=0.569), ValueMatch(current_value='20.55', target_value='0.5', similarity=0.566), ValueMatch(current_value='37.11', target_value='7.1', similarity=0.535), ValueMatch(current_value='42.98', target_value='9.8', similarity=0.518), ValueMatch(current_value='32.32', target_value='2.3', similarity=0.488), ValueMatch(current_value='34.72', target_value='4.7', similarity=0.475), ValueMatch(current_value='47.82', target_value='4.7', similarity=0.471), ValueMatch(current_value='30.47', target_value='4.7', similarity=0.465), ValueMatch(current_value='33.65', target_value='33.3', similarity=0.462), ValueMatch(current_value='65.71', target_value='7.1', similarity=0.453), ValueMatch(current_value='23.88', target_value='2.3', similarity=0.447), ValueMatch(current_value='71.09', target_value='7.1', similarity=0.429), ValueMatch(current_value='31.0', target_value='21.0', similarity=0.416), ValueMatch(current_value='29.4', target_value='29.9', similarity=0.346), ValueMatch(current_value='31.22', target_value='12.5', similarity=0.335), ValueMatch(current_value='26.14', target_value='36.1', similarity=0.331), ValueMatch(current_value='29.52', target_value='39.5', similarity=0.329), ValueMatch(current_value='21.57', target_value='15.2', similarity=0.328), ValueMatch(current_value='39.2', target_value='39.5', similarity=0.317), ValueMatch(current_value='21.83', target_value='18.6', similarity=0.305), ValueMatch(current_value='55.67', target_value='26.7', similarity=0.305), ValueMatch(current_value='31.83', target_value='18.6', similarity=0.302), ValueMatch(current_value='26.0', target_value='26.7', similarity=0.301), ValueMatch(current_value='29.62', target_value='29.9', similarity=0.3), ValueMatch(current_value='31.58', target_value='15.2', similarity=0.299), ValueMatch(current_value='36.2', target_value='36.1', similarity=0.293), ValueMatch(current_value='34.0', target_value='23.4', similarity=0.289), ValueMatch(current_value='36.0', target_value='36.1', similarity=0.286), ValueMatch(current_value='26.22', target_value='26.7', similarity=0.285), ValueMatch(current_value='22.86', target_value='18.6', similarity=0.282), ValueMatch(current_value='36.81', target_value='36.1', similarity=0.281), ValueMatch(current_value='29.37', target_value='29.9', similarity=0.276), ValueMatch(current_value='55.86', target_value='18.6', similarity=0.273), ValueMatch(current_value='34.84', target_value='23.4', similarity=0.27), ValueMatch(current_value='25.37', target_value='12.5', similarity=0.266), ValueMatch(current_value='29.07', target_value='29.9', similarity=0.265), ValueMatch(current_value='68.39', target_value='39.5', similarity=0.263), ValueMatch(current_value='25.68', target_value='12.5', similarity=0.262), ValueMatch(current_value='34.26', target_value='23.4', similarity=0.261), ValueMatch(current_value='39.76', target_value='39.5', similarity=0.256), ValueMatch(current_value='21.45', target_value='21.0', similarity=0.255), ValueMatch(current_value='25.03', target_value='12.5', similarity=0.252), ValueMatch(current_value='39.14', target_value='39.5', similarity=0.25), ValueMatch(current_value='42.19', target_value='21.0', similarity=0.25)]\n",
+ "source: BMI target: necrosis_percent score: 15.809000000000001\n",
+ "value_matches: [ValueMatch(current_value='17.11', target_value='1.1', similarity=0.621), ValueMatch(current_value='26.22', target_value='2.2', similarity=0.573), ValueMatch(current_value='41.44', target_value='1.4', similarity=0.547), ValueMatch(current_value='31.22', target_value='1.2', similarity=0.542), ValueMatch(current_value='37.11', target_value='1.1', similarity=0.535), ValueMatch(current_value='24.32', target_value='2.4', similarity=0.531), ValueMatch(current_value='51.19', target_value='1.9', similarity=0.519), ValueMatch(current_value='20.28', target_value='2.0', similarity=0.506), ValueMatch(current_value='21.83', target_value='1.8', similarity=0.501), ValueMatch(current_value='32.32', target_value='2.3', similarity=0.5), ValueMatch(current_value='31.83', target_value='1.8', similarity=0.494), ValueMatch(current_value='31.58', target_value='1.5', similarity=0.492), ValueMatch(current_value='42.19', target_value='1.9', similarity=0.488), ValueMatch(current_value='22.86', target_value='2.2', similarity=0.487), ValueMatch(current_value='21.57', target_value='1.5', similarity=0.484), ValueMatch(current_value='31.96', target_value='1.9', similarity=0.482), ValueMatch(current_value='31.63', target_value='1.6', similarity=0.482), ValueMatch(current_value='17.85', target_value='1.7', similarity=0.476), ValueMatch(current_value='17.64', target_value='1.7', similarity=0.473), ValueMatch(current_value='32.06', target_value='2.0', similarity=0.471), ValueMatch(current_value='25.37', target_value='2.5', similarity=0.465), ValueMatch(current_value='23.88', target_value='2.3', similarity=0.461), ValueMatch(current_value='25.68', target_value='2.5', similarity=0.458), ValueMatch(current_value='21.45', target_value='1.4', similarity=0.457), ValueMatch(current_value='25.03', target_value='2.5', similarity=0.44), ValueMatch(current_value='26.14', target_value='1.4', similarity=0.44), ValueMatch(current_value='20.55', target_value='2.0', similarity=0.437), ValueMatch(current_value='39.14', target_value='1.4', similarity=0.434)]\n",
+ "source: BMI target: ribosomal_rna_28s_16s_ratio score: 13.795999999999998\n",
+ "value_matches: [ValueMatch(current_value='28.5', target_value='32850', similarity=0.653), ValueMatch(current_value='36.57', target_value='6570', similarity=0.602), ValueMatch(current_value='65.71', target_value='6570', similarity=0.571), ValueMatch(current_value='42.19', target_value='21915', similarity=0.516), ValueMatch(current_value='32.83', target_value='32850', similarity=0.503), ValueMatch(current_value='25.03', target_value='18250', similarity=0.462), ValueMatch(current_value='33.65', target_value='36500', similarity=0.442), ValueMatch(current_value='20.55', target_value='25550', similarity=0.393), ValueMatch(current_value='26.14', target_value='14610', similarity=0.378), ValueMatch(current_value='29.4', target_value='9490', similarity=0.359), ValueMatch(current_value='21.57', target_value='21915', similarity=0.355), ValueMatch(current_value='45.53', target_value='25550', similarity=0.349), ValueMatch(current_value='27.0', target_value='6570', similarity=0.341), ValueMatch(current_value='55.86', target_value='25550', similarity=0.338), ValueMatch(current_value='25.37', target_value='23725', similarity=0.336), ValueMatch(current_value='55.67', target_value='25550', similarity=0.329), ValueMatch(current_value='35.0', target_value='8035', similarity=0.319), ValueMatch(current_value='39.2', target_value='29220', similarity=0.305), ValueMatch(current_value='31.0', target_value='14610', similarity=0.303), ValueMatch(current_value='51.19', target_value='21915', similarity=0.299), ValueMatch(current_value='26.22', target_value='29220', similarity=0.295), ValueMatch(current_value='36.0', target_value='36500', similarity=0.284), ValueMatch(current_value='29.07', target_value='9490', similarity=0.281), ValueMatch(current_value='27.1', target_value='12775', similarity=0.276), ValueMatch(current_value='32.32', target_value='32850', similarity=0.273), ValueMatch(current_value='20.28', target_value='29220', similarity=0.264), ValueMatch(current_value='39.14', target_value='21915', similarity=0.255), ValueMatch(current_value='29.62', target_value='29220', similarity=0.252)]\n",
+ "source: BMI target: age_at_diagnosis score: 10.333000000000002\n",
+ "value_matches: [ValueMatch(current_value='25.03', target_value='25.0', similarity=0.737), ValueMatch(current_value='31.0', target_value='1.0', similarity=0.626), ValueMatch(current_value='35.0', target_value='5.0', similarity=0.577), ValueMatch(current_value='43.0', target_value='3.0', similarity=0.569), ValueMatch(current_value='20.55', target_value='0.5', similarity=0.544), ValueMatch(current_value='20.28', target_value='2.0', similarity=0.496), ValueMatch(current_value='31.58', target_value='1.5', similarity=0.476), ValueMatch(current_value='21.57', target_value='1.5', similarity=0.466), ValueMatch(current_value='71.09', target_value='1.0', similarity=0.452), ValueMatch(current_value='30.48', target_value='3.0', similarity=0.448), ValueMatch(current_value='32.06', target_value='2.0', similarity=0.446), ValueMatch(current_value='30.47', target_value='3.0', similarity=0.44), ValueMatch(current_value='25.37', target_value='2.5', similarity=0.439), ValueMatch(current_value='30.85', target_value='3.0', similarity=0.433), ValueMatch(current_value='25.68', target_value='2.5', similarity=0.432), ValueMatch(current_value='40.72', target_value='0.75', similarity=0.283), ValueMatch(current_value='29.07', target_value='0.75', similarity=0.278)]\n",
+ "source: BMI target: concentration score: 8.142000000000001\n",
+ "value_matches: [ValueMatch(current_value='41.44', target_value='14.4', similarity=0.809), ValueMatch(current_value='35.0', target_value='5.0', similarity=0.615), ValueMatch(current_value='27.1', target_value='7.1', similarity=0.601), ValueMatch(current_value='28.5', target_value='8.5', similarity=0.587), ValueMatch(current_value='17.11', target_value='7.1', similarity=0.571), ValueMatch(current_value='20.55', target_value='0.5', similarity=0.568), ValueMatch(current_value='37.11', target_value='7.1', similarity=0.537), ValueMatch(current_value='24.32', target_value='2.4', similarity=0.534), ValueMatch(current_value='31.63', target_value='6.3', similarity=0.524), ValueMatch(current_value='31.22', target_value='1.2', similarity=0.518), ValueMatch(current_value='23.88', target_value='3.8', similarity=0.494), ValueMatch(current_value='38.88', target_value='3.8', similarity=0.492), ValueMatch(current_value='38.89', target_value='3.8', similarity=0.492), ValueMatch(current_value='25.03', target_value='5.0', similarity=0.48), ValueMatch(current_value='65.71', target_value='7.1', similarity=0.453), ValueMatch(current_value='46.85', target_value='8.5', similarity=0.45), ValueMatch(current_value='38.54', target_value='8.5', similarity=0.446), ValueMatch(current_value='17.85', target_value='8.5', similarity=0.439), ValueMatch(current_value='30.85', target_value='8.5', similarity=0.426), ValueMatch(current_value='38.41', target_value='3.8', similarity=0.425), ValueMatch(current_value='71.09', target_value='7.1', similarity=0.425), ValueMatch(current_value='38.44', target_value='3.8', similarity=0.417), ValueMatch(current_value='44.0', target_value='14.4', similarity=0.411), ValueMatch(current_value='38.97', target_value='3.8', similarity=0.403), ValueMatch(current_value='44.81', target_value='14.4', similarity=0.374), ValueMatch(current_value='36.0', target_value='16.0', similarity=0.372), ValueMatch(current_value='46.0', target_value='16.0', similarity=0.369), ValueMatch(current_value='26.0', target_value='16.0', similarity=0.368), ValueMatch(current_value='40.72', target_value='20.7', similarity=0.344), ValueMatch(current_value='29.07', target_value='20.7', similarity=0.339), ValueMatch(current_value='51.19', target_value='11.2', similarity=0.333), ValueMatch(current_value='21.83', target_value='18.5', similarity=0.31), ValueMatch(current_value='31.83', target_value='18.5', similarity=0.308), ValueMatch(current_value='21.45', target_value='14.4', similarity=0.307), ValueMatch(current_value='20.28', target_value='20.7', similarity=0.301), ValueMatch(current_value='26.14', target_value='14.4', similarity=0.297), ValueMatch(current_value='39.14', target_value='14.4', similarity=0.293), ValueMatch(current_value='28.84', target_value='12.8', similarity=0.287), ValueMatch(current_value='32.06', target_value='20.7', similarity=0.276), ValueMatch(current_value='22.86', target_value='12.8', similarity=0.27), ValueMatch(current_value='55.86', target_value='8.5', similarity=0.261)]\n",
+ "source: BMI target: tumor_burden score: 17.525999999999996\n",
+ "value_matches: [ValueMatch(current_value='36.0', target_value='6.0', similarity=0.631), ValueMatch(current_value='46.0', target_value='6.0', similarity=0.627), ValueMatch(current_value='26.0', target_value='6.0', similarity=0.623), ValueMatch(current_value='28.5', target_value='8.5', similarity=0.592), ValueMatch(current_value='43.0', target_value='4.3', similarity=0.571), ValueMatch(current_value='31.22', target_value='1.2', similarity=0.546), ValueMatch(current_value='34.84', target_value='4.8', similarity=0.537), ValueMatch(current_value='48.46', target_value='4.8', similarity=0.519), ValueMatch(current_value='33.65', target_value='3.3', similarity=0.517), ValueMatch(current_value='71.09', target_value='0.9', similarity=0.515), ValueMatch(current_value='34.37', target_value='4.3', similarity=0.508), ValueMatch(current_value='21.83', target_value='1.8', similarity=0.505), ValueMatch(current_value='31.83', target_value='1.8', similarity=0.499), ValueMatch(current_value='38.54', target_value='5.4', similarity=0.497), ValueMatch(current_value='38.97', target_value='9.7', similarity=0.492), ValueMatch(current_value='44.81', target_value='4.8', similarity=0.489), ValueMatch(current_value='39.76', target_value='9.7', similarity=0.488), ValueMatch(current_value='34.72', target_value='7.2', similarity=0.487), ValueMatch(current_value='35.42', target_value='5.4', similarity=0.476), ValueMatch(current_value='59.78', target_value='9.7', similarity=0.471), ValueMatch(current_value='25.37', target_value='2.5', similarity=0.468), ValueMatch(current_value='40.72', target_value='7.2', similarity=0.466), ValueMatch(current_value='46.85', target_value='8.5', similarity=0.46), ValueMatch(current_value='25.68', target_value='2.5', similarity=0.459), ValueMatch(current_value='21.45', target_value='2.1', similarity=0.456), ValueMatch(current_value='34.89', target_value='4.8', similarity=0.451), ValueMatch(current_value='17.85', target_value='8.5', similarity=0.448), ValueMatch(current_value='42.19', target_value='2.1', similarity=0.447), ValueMatch(current_value='30.48', target_value='4.8', similarity=0.443), ValueMatch(current_value='25.03', target_value='2.5', similarity=0.441), ValueMatch(current_value='21.57', target_value='2.1', similarity=0.439), ValueMatch(current_value='30.85', target_value='8.5', similarity=0.435), ValueMatch(current_value='24.32', target_value='4.3', similarity=0.427), ValueMatch(current_value='29.37', target_value='3.7', similarity=0.426), ValueMatch(current_value='37.69', target_value='3.7', similarity=0.426), ValueMatch(current_value='37.11', target_value='3.7', similarity=0.424), ValueMatch(current_value='31.0', target_value='10.4', similarity=0.405), ValueMatch(current_value='30.47', target_value='10.4', similarity=0.3), ValueMatch(current_value='55.86', target_value='8.5', similarity=0.261), ValueMatch(current_value='17.11', target_value='2.1', similarity=0.257)]\n",
+ "source: BMI target: tumor_width_measurement score: 18.929\n",
+ "value_matches: [ValueMatch(current_value='27.83', target_value='27830973', similarity=0.595), ValueMatch(current_value='29.52', target_value='29567829', similarity=0.468), ValueMatch(current_value='46.41', target_value='26786412', similarity=0.408), ValueMatch(current_value='39.76', target_value='33289765', similarity=0.405), ValueMatch(current_value='38.97', target_value='33289765', similarity=0.401), ValueMatch(current_value='46.85', target_value='25468954', similarity=0.393), ValueMatch(current_value='27.82', target_value='29567829', similarity=0.389), ValueMatch(current_value='47.82', target_value='28954789', similarity=0.385), ValueMatch(current_value='32.83', target_value='33289765', similarity=0.384), ValueMatch(current_value='55.67', target_value='35678901', similarity=0.381), ValueMatch(current_value='34.53', target_value='39012345', similarity=0.372), ValueMatch(current_value='32.06', target_value='32098765', similarity=0.365), ValueMatch(current_value='42.98', target_value='31298756', similarity=0.36), ValueMatch(current_value='31.22', target_value='31298756', similarity=0.336), ValueMatch(current_value='33.65', target_value='33289765', similarity=0.312), ValueMatch(current_value='30.48', target_value='30541042', similarity=0.284), ValueMatch(current_value='30.47', target_value='30541042', similarity=0.281), ValueMatch(current_value='27.31', target_value='27830973', similarity=0.281), ValueMatch(current_value='29.4', target_value='29567829', similarity=0.271), ValueMatch(current_value='38.54', target_value='25468954', similarity=0.27), ValueMatch(current_value='25.68', target_value='25468954', similarity=0.266), ValueMatch(current_value='23.88', target_value='38901234', similarity=0.265), ValueMatch(current_value='29.62', target_value='29567829', similarity=0.261), ValueMatch(current_value='35.42', target_value='25468954', similarity=0.26), ValueMatch(current_value='38.89', target_value='28954789', similarity=0.253)]\n",
+ "source: BMI target: pmid score: 8.645999999999999\n",
+ "value_matches: [ValueMatch(current_value='38.89', target_value='88.9', similarity=0.831), ValueMatch(current_value='25.68', target_value='25.6', similarity=0.779), ValueMatch(current_value='40.72', target_value='40.7', similarity=0.763), ValueMatch(current_value='20.55', target_value='205.1', similarity=0.608), ValueMatch(current_value='38.88', target_value='88.9', similarity=0.511), ValueMatch(current_value='44.0', target_value='40.7', similarity=0.384), ValueMatch(current_value='45.53', target_value='55.4', similarity=0.378), ValueMatch(current_value='34.0', target_value='40.7', similarity=0.376), ValueMatch(current_value='36.0', target_value='60.8', similarity=0.366), ValueMatch(current_value='46.0', target_value='60.8', similarity=0.363), ValueMatch(current_value='26.0', target_value='60.8', similarity=0.361), ValueMatch(current_value='43.0', target_value='30.0', similarity=0.359), ValueMatch(current_value='29.07', target_value='90.0', similarity=0.351), ValueMatch(current_value='31.0', target_value='110.4', similarity=0.351), ValueMatch(current_value='34.89', target_value='88.9', similarity=0.349), ValueMatch(current_value='23.88', target_value='88.9', similarity=0.345), ValueMatch(current_value='28.84', target_value='88.9', similarity=0.341), ValueMatch(current_value='29.52', target_value='45.2', similarity=0.336), ValueMatch(current_value='17.11', target_value='110.4', similarity=0.336), ValueMatch(current_value='38.97', target_value='88.9', similarity=0.335), ValueMatch(current_value='55.67', target_value='25.6', similarity=0.333), ValueMatch(current_value='38.54', target_value='55.4', similarity=0.33), ValueMatch(current_value='55.86', target_value='55.4', similarity=0.325), ValueMatch(current_value='30.85', target_value='60.8', similarity=0.323), ValueMatch(current_value='35.42', target_value='55.4', similarity=0.318), ValueMatch(current_value='25.37', target_value='75.3', similarity=0.313), ValueMatch(current_value='35.0', target_value='150.5', similarity=0.311), ValueMatch(current_value='34.53', target_value='75.3', similarity=0.292), ValueMatch(current_value='37.11', target_value='110.4', similarity=0.289), ValueMatch(current_value='46.45', target_value='45.2', similarity=0.287), ValueMatch(current_value='21.45', target_value='45.2', similarity=0.285), ValueMatch(current_value='30.48', target_value='30.0', similarity=0.285), ValueMatch(current_value='36.57', target_value='95.7', similarity=0.284), ValueMatch(current_value='30.47', target_value='30.0', similarity=0.28), ValueMatch(current_value='34.06', target_value='40.7', similarity=0.277), ValueMatch(current_value='51.19', target_value='205.1', similarity=0.277), ValueMatch(current_value='65.71', target_value='95.7', similarity=0.275), ValueMatch(current_value='21.57', target_value='95.7', similarity=0.27), ValueMatch(current_value='25.03', target_value='25.6', similarity=0.267), ValueMatch(current_value='31.58', target_value='150.5', similarity=0.263), ValueMatch(current_value='20.28', target_value='130.2', similarity=0.263), ValueMatch(current_value='17.85', target_value='175.0', similarity=0.26), ValueMatch(current_value='71.09', target_value='110.4', similarity=0.257)]\n",
+ "source: BMI target: gross_tumor_weight score: 15.486999999999998\n",
+ "value_matches: [ValueMatch(current_value='20.28', target_value='0.028', similarity=0.65), ValueMatch(current_value='35.0', target_value='0.035', similarity=0.384), ValueMatch(current_value='20.55', target_value='0.005', similarity=0.321), ValueMatch(current_value='25.03', target_value='0.03', similarity=0.316), ValueMatch(current_value='40.72', target_value='0.007', similarity=0.282), ValueMatch(current_value='29.07', target_value='0.007', similarity=0.278), ValueMatch(current_value='31.22', target_value='0.012', similarity=0.276), ValueMatch(current_value='33.65', target_value='0.033', similarity=0.27), ValueMatch(current_value='28.5', target_value='0.028', similarity=0.266), ValueMatch(current_value='21.83', target_value='0.018', similarity=0.25)]\n",
+ "source: BMI target: proportion_base_mismatch score: 3.2929999999999997\n",
+ "value_matches: [ValueMatch(current_value='23.88', target_value='23.8', similarity=0.802), ValueMatch(current_value='34.53', target_value='45.3', similarity=0.798), ValueMatch(current_value='35.0', target_value='3.5', similarity=0.575), ValueMatch(current_value='45.53', target_value='45.3', similarity=0.531), ValueMatch(current_value='55.67', target_value='5.6', similarity=0.516), ValueMatch(current_value='39.14', target_value='9.1', similarity=0.515), ValueMatch(current_value='34.72', target_value='7.2', similarity=0.491), ValueMatch(current_value='25.68', target_value='5.6', similarity=0.475), ValueMatch(current_value='35.42', target_value='3.5', similarity=0.47), ValueMatch(current_value='40.72', target_value='7.2', similarity=0.463), ValueMatch(current_value='27.0', target_value='70.4', similarity=0.423), ValueMatch(current_value='27.1', target_value='47.1', similarity=0.354), ValueMatch(current_value='36.2', target_value='62.8', similarity=0.354), ValueMatch(current_value='30.47', target_value='70.4', similarity=0.352), ValueMatch(current_value='31.22', target_value='12.5', similarity=0.338), ValueMatch(current_value='38.89', target_value='18.9', similarity=0.336), ValueMatch(current_value='17.11', target_value='47.1', similarity=0.336), ValueMatch(current_value='26.22', target_value='62.8', similarity=0.318), ValueMatch(current_value='37.11', target_value='47.1', similarity=0.316), ValueMatch(current_value='28.5', target_value='62.8', similarity=0.314), ValueMatch(current_value='22.86', target_value='62.8', similarity=0.314), ValueMatch(current_value='32.32', target_value='23.8', similarity=0.314), ValueMatch(current_value='46.45', target_value='45.3', similarity=0.308), ValueMatch(current_value='30.48', target_value='70.4', similarity=0.304), ValueMatch(current_value='34.89', target_value='18.9', similarity=0.303), ValueMatch(current_value='31.58', target_value='15.4', similarity=0.302), ValueMatch(current_value='21.83', target_value='18.9', similarity=0.302), ValueMatch(current_value='45.83', target_value='45.3', similarity=0.301), ValueMatch(current_value='31.83', target_value='18.9', similarity=0.301), ValueMatch(current_value='21.57', target_value='15.4', similarity=0.296), ValueMatch(current_value='38.54', target_value='15.4', similarity=0.295), ValueMatch(current_value='29.62', target_value='62.8', similarity=0.293), ValueMatch(current_value='38.97', target_value='18.9', similarity=0.29), ValueMatch(current_value='25.37', target_value='45.3', similarity=0.286), ValueMatch(current_value='47.82', target_value='47.1', similarity=0.283), ValueMatch(current_value='28.84', target_value='62.8', similarity=0.282), ValueMatch(current_value='38.88', target_value='23.8', similarity=0.273), ValueMatch(current_value='65.71', target_value='47.1', similarity=0.267), ValueMatch(current_value='21.45', target_value='45.3', similarity=0.266), ValueMatch(current_value='34.0', target_value='34.9', similarity=0.263), ValueMatch(current_value='20.28', target_value='62.8', similarity=0.258), ValueMatch(current_value='25.03', target_value='12.5', similarity=0.25), ValueMatch(current_value='71.09', target_value='47.1', similarity=0.25)]\n",
+ "source: BMI target: percent_inflam_infiltration score: 15.677999999999997\n",
+ "Top k reranked columns: [('BMI', 'average_base_quality', 23.353), ('BMI', 'longest_dimension', 20.377), ('BMI', 'recist_targeted_regions_sum', 20.003000000000004), ('BMI', 'intermediate_dimension', 19.926), ('BMI', 'body_surface_area', 19.580999999999996), ('BMI', 'percent_stromal_cells', 19.118), ('BMI', 'bmi', 19.058), ('BMI', 'tumor_width_measurement', 18.929), ('BMI', 'tumor_burden', 17.525999999999996), ('BMI', 'spindle_cell_percent', 16.623), ('BMI', 'percent_normal_cells', 16.029), ('BMI', 'necrosis_percent', 15.809000000000001), ('BMI', 'percent_inflam_infiltration', 15.677999999999997), ('BMI', 'gross_tumor_weight', 15.486999999999998), ('BMI', 'ribosomal_rna_28s_16s_ratio', 13.795999999999998), ('BMI', 'a260_a280_ratio', 13.354999999999995), ('BMI', 'age_at_diagnosis', 10.333000000000002), ('BMI', 'pmid', 8.645999999999999), ('BMI', 'concentration', 8.142000000000001), ('BMI', 'proportion_base_mismatch', 3.2929999999999997)]\n",
+ "\n",
+ "Source column: Age\n",
+ "value_matches: [ValueMatch(current_value='55.0', target_value='45.5', similarity=0.488), ValueMatch(current_value='45.0', target_value='45.5', similarity=0.467), ValueMatch(current_value='58.0', target_value='58.6', similarity=0.425), ValueMatch(current_value='78.0', target_value='67.8', similarity=0.425), ValueMatch(current_value='86.0', target_value='58.6', similarity=0.42), ValueMatch(current_value='63.0', target_value='63.7', similarity=0.42), ValueMatch(current_value='76.0', target_value='76.8', similarity=0.42), ValueMatch(current_value='73.0', target_value='73.2', similarity=0.415), ValueMatch(current_value='67.0', target_value='67.8', similarity=0.409), ValueMatch(current_value='90.0', target_value='89.0', similarity=0.394), ValueMatch(current_value='68.0', target_value='76.8', similarity=0.394), ValueMatch(current_value='69.0', target_value='89.0', similarity=0.349), ValueMatch(current_value='59.0', target_value='89.0', similarity=0.347), ValueMatch(current_value='61.0', target_value='110.5', similarity=0.341), ValueMatch(current_value='71.0', target_value='110.5', similarity=0.34), ValueMatch(current_value='41.0', target_value='110.5', similarity=0.336), ValueMatch(current_value='77.0', target_value='70.1', similarity=0.325), ValueMatch(current_value='70.0', target_value='70.1', similarity=0.319), ValueMatch(current_value='57.0', target_value='70.1', similarity=0.273)]\n",
+ "source: Age target: weight score: 7.3069999999999995\n",
+ "value_matches: [ValueMatch(current_value='55.0', target_value='25550', similarity=0.79), ValueMatch(current_value='57.0', target_value='6570', similarity=0.764), ValueMatch(current_value='73.0', target_value='7305', similarity=0.755), ValueMatch(current_value='61.0', target_value='14610', similarity=0.679), ValueMatch(current_value='65.0', target_value='36500', similarity=0.632), ValueMatch(current_value='85.0', target_value='32850', similarity=0.597), ValueMatch(current_value='50.0', target_value='36500', similarity=0.584), ValueMatch(current_value='90.0', target_value='9490', similarity=0.378), ValueMatch(current_value='41.0', target_value='14610', similarity=0.347), ValueMatch(current_value='77.0', target_value='12775', similarity=0.343), ValueMatch(current_value='72.0', target_value='23725', similarity=0.339), ValueMatch(current_value='59.0', target_value='9490', similarity=0.335), ValueMatch(current_value='69.0', target_value='9490', similarity=0.333), ValueMatch(current_value='75.0', target_value='12775', similarity=0.319), ValueMatch(current_value='42.0', target_value='16425', similarity=0.302), ValueMatch(current_value='38.0', target_value='8035', similarity=0.301), ValueMatch(current_value='67.0', target_value='6570', similarity=0.293), ValueMatch(current_value='53.0', target_value='7305', similarity=0.292), ValueMatch(current_value='46.0', target_value='14610', similarity=0.291), ValueMatch(current_value='62.0', target_value='29220', similarity=0.289), ValueMatch(current_value='71.0', target_value='14610', similarity=0.287), ValueMatch(current_value='64.0', target_value='16425', similarity=0.28), ValueMatch(current_value='70.0', target_value='6570', similarity=0.275), ValueMatch(current_value='58.0', target_value='8035', similarity=0.266), ValueMatch(current_value='60.0', target_value='36500', similarity=0.251)]\n",
+ "source: Age target: age_at_diagnosis score: 10.322\n",
+ "value_matches: [ValueMatch(current_value='55.0', target_value='55', similarity=0.726), ValueMatch(current_value='48.0', target_value='48', similarity=0.693), ValueMatch(current_value='42.0', target_value='42', similarity=0.689), ValueMatch(current_value='45.0', target_value='45', similarity=0.687), ValueMatch(current_value='58.0', target_value='58', similarity=0.679), ValueMatch(current_value='62.0', target_value='62', similarity=0.678), ValueMatch(current_value='53.0', target_value='53', similarity=0.676), ValueMatch(current_value='65.0', target_value='65', similarity=0.675), ValueMatch(current_value='77.0', target_value='70', similarity=0.653), ValueMatch(current_value='70.0', target_value='70', similarity=0.633), ValueMatch(current_value='50.0', target_value='50', similarity=0.603), ValueMatch(current_value='57.0', target_value='70', similarity=0.546), ValueMatch(current_value='67.0', target_value='70', similarity=0.544), ValueMatch(current_value='75.0', target_value='50', similarity=0.503), ValueMatch(current_value='85.0', target_value='50', similarity=0.497), ValueMatch(current_value='72.0', target_value='27', similarity=0.303), ValueMatch(current_value='44.0', target_value='45', similarity=0.283)]\n",
+ "source: Age target: relationship_age_at_diagnosis score: 10.068000000000001\n",
+ "value_matches: [ValueMatch(current_value='55.0', target_value='55', similarity=0.727), ValueMatch(current_value='45.0', target_value='45', similarity=0.689), ValueMatch(current_value='75.0', target_value='75', similarity=0.683), ValueMatch(current_value='65.0', target_value='65', similarity=0.677), ValueMatch(current_value='44.0', target_value='40', similarity=0.671), ValueMatch(current_value='77.0', target_value='70', similarity=0.652), ValueMatch(current_value='70.0', target_value='70', similarity=0.627), ValueMatch(current_value='66.0', target_value='60', similarity=0.622), ValueMatch(current_value='60.0', target_value='60', similarity=0.601), ValueMatch(current_value='50.0', target_value='50', similarity=0.596), ValueMatch(current_value='53.0', target_value='30', similarity=0.574), ValueMatch(current_value='63.0', target_value='30', similarity=0.571), ValueMatch(current_value='73.0', target_value='30', similarity=0.567), ValueMatch(current_value='64.0', target_value='40', similarity=0.562), ValueMatch(current_value='83.0', target_value='30', similarity=0.562), ValueMatch(current_value='74.0', target_value='40', similarity=0.558), ValueMatch(current_value='58.0', target_value='80', similarity=0.546), ValueMatch(current_value='68.0', target_value='80', similarity=0.544), ValueMatch(current_value='57.0', target_value='70', similarity=0.543), ValueMatch(current_value='67.0', target_value='70', similarity=0.541), ValueMatch(current_value='78.0', target_value='80', similarity=0.54), ValueMatch(current_value='48.0', target_value='80', similarity=0.536), ValueMatch(current_value='38.0', target_value='80', similarity=0.527), ValueMatch(current_value='56.0', target_value='60', similarity=0.512), ValueMatch(current_value='76.0', target_value='60', similarity=0.506), ValueMatch(current_value='46.0', target_value='60', similarity=0.502), ValueMatch(current_value='86.0', target_value='60', similarity=0.5), ValueMatch(current_value='85.0', target_value='50', similarity=0.494), ValueMatch(current_value='42.0', target_value='24', similarity=0.339)]\n",
+ "source: Age target: age_at_onset score: 16.569\n",
+ "value_matches: [ValueMatch(current_value='55.0', target_value='55', similarity=0.728), ValueMatch(current_value='45.0', target_value='45', similarity=0.689), ValueMatch(current_value='85.0', target_value='85', similarity=0.689), ValueMatch(current_value='75.0', target_value='75', similarity=0.684), ValueMatch(current_value='90.0', target_value='90', similarity=0.682), ValueMatch(current_value='65.0', target_value='65', similarity=0.677), ValueMatch(current_value='44.0', target_value='40', similarity=0.671), ValueMatch(current_value='77.0', target_value='70', similarity=0.651), ValueMatch(current_value='70.0', target_value='70', similarity=0.626), ValueMatch(current_value='66.0', target_value='60', similarity=0.622), ValueMatch(current_value='59.0', target_value='90', similarity=0.609), ValueMatch(current_value='69.0', target_value='90', similarity=0.607), ValueMatch(current_value='60.0', target_value='60', similarity=0.599), ValueMatch(current_value='50.0', target_value='50', similarity=0.594), ValueMatch(current_value='53.0', target_value='30', similarity=0.573), ValueMatch(current_value='63.0', target_value='30', similarity=0.571), ValueMatch(current_value='73.0', target_value='30', similarity=0.567), ValueMatch(current_value='83.0', target_value='30', similarity=0.563), ValueMatch(current_value='64.0', target_value='40', similarity=0.561), ValueMatch(current_value='74.0', target_value='40', similarity=0.557), ValueMatch(current_value='57.0', target_value='70', similarity=0.543), ValueMatch(current_value='58.0', target_value='80', similarity=0.54), ValueMatch(current_value='67.0', target_value='70', similarity=0.54), ValueMatch(current_value='68.0', target_value='80', similarity=0.538), ValueMatch(current_value='78.0', target_value='80', similarity=0.534), ValueMatch(current_value='48.0', target_value='80', similarity=0.531), ValueMatch(current_value='38.0', target_value='80', similarity=0.521), ValueMatch(current_value='56.0', target_value='60', similarity=0.511), ValueMatch(current_value='76.0', target_value='60', similarity=0.505), ValueMatch(current_value='46.0', target_value='60', similarity=0.502), ValueMatch(current_value='86.0', target_value='60', similarity=0.502), ValueMatch(current_value='42.0', target_value='24', similarity=0.354)]\n",
+ "source: Age target: age_at_last_exposure score: 18.641\n",
+ "value_matches: [ValueMatch(current_value='70.0', target_value='70.0', similarity=1.0), ValueMatch(current_value='58.0', target_value='55.8', similarity=0.467), ValueMatch(current_value='55.0', target_value='55.8', similarity=0.466), ValueMatch(current_value='59.0', target_value='65.9', similarity=0.448), ValueMatch(current_value='53.0', target_value='75.3', similarity=0.432), ValueMatch(current_value='75.0', target_value='75.3', similarity=0.409), ValueMatch(current_value='77.0', target_value='70.0', similarity=0.408), ValueMatch(current_value='85.0', target_value='85.1', similarity=0.403), ValueMatch(current_value='45.0', target_value='45.2', similarity=0.399), ValueMatch(current_value='65.0', target_value='65.9', similarity=0.395), ValueMatch(current_value='90.0', target_value='90.4', similarity=0.393), ValueMatch(current_value='68.0', target_value='80.6', similarity=0.375), ValueMatch(current_value='57.0', target_value='35.7', similarity=0.369), ValueMatch(current_value='44.0', target_value='40.3', similarity=0.362), ValueMatch(current_value='67.0', target_value='70.0', similarity=0.348), ValueMatch(current_value='69.0', target_value='90.4', similarity=0.345), ValueMatch(current_value='50.0', target_value='70.0', similarity=0.342), ValueMatch(current_value='60.0', target_value='70.0', similarity=0.34), ValueMatch(current_value='63.0', target_value='30.5', similarity=0.316), ValueMatch(current_value='73.0', target_value='30.5', similarity=0.314), ValueMatch(current_value='83.0', target_value='30.5', similarity=0.31), ValueMatch(current_value='64.0', target_value='40.3', similarity=0.305), ValueMatch(current_value='74.0', target_value='40.3', similarity=0.303), ValueMatch(current_value='78.0', target_value='80.6', similarity=0.299), ValueMatch(current_value='48.0', target_value='80.6', similarity=0.296), ValueMatch(current_value='38.0', target_value='80.6', similarity=0.293), ValueMatch(current_value='66.0', target_value='60.1', similarity=0.292)]\n",
+ "source: Age target: initial_weight score: 10.428999999999998\n",
+ "value_matches: [ValueMatch(current_value='59.0', target_value='59', similarity=0.689), ValueMatch(current_value='41.0', target_value='41', similarity=0.688), ValueMatch(current_value='85.0', target_value='85', similarity=0.685), ValueMatch(current_value='78.0', target_value='78', similarity=0.683), ValueMatch(current_value='53.0', target_value='53', similarity=0.682), ValueMatch(current_value='72.0', target_value='72', similarity=0.681), ValueMatch(current_value='65.0', target_value='65', similarity=0.675), ValueMatch(current_value='55.0', target_value='5', similarity=0.528), ValueMatch(current_value='61.0', target_value='1', similarity=0.361), ValueMatch(current_value='71.0', target_value='1', similarity=0.36), ValueMatch(current_value='50.0', target_value='5', similarity=0.302), ValueMatch(current_value='56.0', target_value='5', similarity=0.284), ValueMatch(current_value='75.0', target_value='5', similarity=0.283), ValueMatch(current_value='45.0', target_value='5', similarity=0.281), ValueMatch(current_value='58.0', target_value='5', similarity=0.279), ValueMatch(current_value='57.0', target_value='5', similarity=0.278), ValueMatch(current_value='74.0', target_value='47', similarity=0.264), ValueMatch(current_value='44.0', target_value='41', similarity=0.262)]\n",
+ "source: Age target: age_at_index score: 8.264999999999999\n",
+ "value_matches: [ValueMatch(current_value='73.0', target_value='-7300', similarity=0.81), ValueMatch(current_value='57.0', target_value='-6570', similarity=0.776), ValueMatch(current_value='38.0', target_value='-4380', similarity=0.739), ValueMatch(current_value='41.0', target_value='-12410', similarity=0.662), ValueMatch(current_value='46.0', target_value='-14600', similarity=0.658), ValueMatch(current_value='60.0', target_value='-14600', similarity=0.585), ValueMatch(current_value='68.0', target_value='-11680', similarity=0.578), ValueMatch(current_value='55.0', target_value='-9855', similarity=0.388), ValueMatch(current_value='75.0', target_value='-6575', similarity=0.375), ValueMatch(current_value='65.0', target_value='-6575', similarity=0.367), ValueMatch(current_value='85.0', target_value='-9855', similarity=0.353), ValueMatch(current_value='83.0', target_value='-8030', similarity=0.331), ValueMatch(current_value='90.0', target_value='-2190', similarity=0.302), ValueMatch(current_value='67.0', target_value='-6570', similarity=0.3), ValueMatch(current_value='70.0', target_value='-7300', similarity=0.298), ValueMatch(current_value='61.0', target_value='-5110', similarity=0.295), ValueMatch(current_value='71.0', target_value='-5110', similarity=0.293), ValueMatch(current_value='62.0', target_value='-2920', similarity=0.293), ValueMatch(current_value='72.0', target_value='-2920', similarity=0.291), ValueMatch(current_value='42.0', target_value='-2920', similarity=0.288), ValueMatch(current_value='77.0', target_value='-6570', similarity=0.286), ValueMatch(current_value='48.0', target_value='-4380', similarity=0.28), ValueMatch(current_value='63.0', target_value='-7300', similarity=0.268), ValueMatch(current_value='53.0', target_value='-7300', similarity=0.267), ValueMatch(current_value='69.0', target_value='-2190', similarity=0.263), ValueMatch(current_value='59.0', target_value='-2190', similarity=0.263)]\n",
+ "source: Age target: days_to_birth score: 10.608999999999998\n",
+ "value_matches: [ValueMatch(current_value='66.0', target_value='66.7', similarity=0.476), ValueMatch(current_value='78.0', target_value='78.8', similarity=0.458), ValueMatch(current_value='77.0', target_value='77.9', similarity=0.457), ValueMatch(current_value='67.0', target_value='66.7', similarity=0.442), ValueMatch(current_value='83.0', target_value='83.7', similarity=0.439), ValueMatch(current_value='72.0', target_value='72.5', similarity=0.43), ValueMatch(current_value='69.0', target_value='69.5', similarity=0.423), ValueMatch(current_value='65.0', target_value='65.4', similarity=0.399), ValueMatch(current_value='68.0', target_value='68.9', similarity=0.395), ValueMatch(current_value='74.0', target_value='74.3', similarity=0.395), ValueMatch(current_value='75.0', target_value='75.2', similarity=0.39), ValueMatch(current_value='70.0', target_value='70.3', similarity=0.314), ValueMatch(current_value='58.0', target_value='80.1', similarity=0.267), ValueMatch(current_value='57.0', target_value='70.3', similarity=0.266), ValueMatch(current_value='48.0', target_value='80.1', similarity=0.264), ValueMatch(current_value='38.0', target_value='80.1', similarity=0.26)]\n",
+ "source: Age target: fev1_fvc_pre_bronch_percent score: 6.075\n",
+ "value_matches: [ValueMatch(current_value='85.0', target_value='85.0', similarity=1.0), ValueMatch(current_value='77.0', target_value='77.4', similarity=0.477), ValueMatch(current_value='74.0', target_value='77.4', similarity=0.459), ValueMatch(current_value='83.0', target_value='83.6', similarity=0.437), ValueMatch(current_value='72.0', target_value='72.6', similarity=0.431), ValueMatch(current_value='69.0', target_value='69.7', similarity=0.43), ValueMatch(current_value='62.0', target_value='62.8', similarity=0.422), ValueMatch(current_value='67.0', target_value='67.5', similarity=0.408), ValueMatch(current_value='75.0', target_value='67.5', similarity=0.403), ValueMatch(current_value='64.0', target_value='64.3', similarity=0.393), ValueMatch(current_value='68.0', target_value='68.9', similarity=0.388), ValueMatch(current_value='90.0', target_value='90.3', similarity=0.379), ValueMatch(current_value='55.0', target_value='85.0', similarity=0.344), ValueMatch(current_value='50.0', target_value='85.0', similarity=0.333), ValueMatch(current_value='59.0', target_value='90.3', similarity=0.332), ValueMatch(current_value='70.0', target_value='70.1', similarity=0.309), ValueMatch(current_value='65.0', target_value='85.0', similarity=0.284), ValueMatch(current_value='45.0', target_value='85.0', similarity=0.278), ValueMatch(current_value='57.0', target_value='70.1', similarity=0.263), ValueMatch(current_value='71.0', target_value='70.1', similarity=0.262)]\n",
+ "source: Age target: fev1_fvc_post_bronch_percent score: 8.032000000000002\n",
+ "value_matches: [ValueMatch(current_value='90.0', target_value='90.0', similarity=1.0), ValueMatch(current_value='75.0', target_value='175.0', similarity=0.712), ValueMatch(current_value='55.0', target_value='55.4', similarity=0.472), ValueMatch(current_value='53.0', target_value='75.3', similarity=0.452), ValueMatch(current_value='59.0', target_value='90.0', similarity=0.414), ValueMatch(current_value='69.0', target_value='90.0', similarity=0.412), ValueMatch(current_value='45.0', target_value='45.2', similarity=0.411), ValueMatch(current_value='56.0', target_value='25.6', similarity=0.396), ValueMatch(current_value='74.0', target_value='40.7', similarity=0.389), ValueMatch(current_value='57.0', target_value='95.7', similarity=0.386), ValueMatch(current_value='41.0', target_value='110.4', similarity=0.386), ValueMatch(current_value='44.0', target_value='40.7', similarity=0.368), ValueMatch(current_value='86.0', target_value='60.8', similarity=0.364), ValueMatch(current_value='63.0', target_value='30.0', similarity=0.359), ValueMatch(current_value='73.0', target_value='30.0', similarity=0.358), ValueMatch(current_value='83.0', target_value='30.0', similarity=0.353), ValueMatch(current_value='71.0', target_value='110.4', similarity=0.323), ValueMatch(current_value='61.0', target_value='110.4', similarity=0.323), ValueMatch(current_value='66.0', target_value='60.8', similarity=0.31), ValueMatch(current_value='64.0', target_value='40.7', similarity=0.308), ValueMatch(current_value='50.0', target_value='30.0', similarity=0.306), ValueMatch(current_value='60.0', target_value='60.8', similarity=0.304), ValueMatch(current_value='70.0', target_value='30.0', similarity=0.291), ValueMatch(current_value='62.0', target_value='205.1', similarity=0.263), ValueMatch(current_value='72.0', target_value='205.1', similarity=0.262), ValueMatch(current_value='42.0', target_value='205.1', similarity=0.26), ValueMatch(current_value='76.0', target_value='60.8', similarity=0.252), ValueMatch(current_value='46.0', target_value='60.8', similarity=0.25)]\n",
+ "source: Age target: gross_tumor_weight score: 10.684000000000001\n",
+ "value_matches: []\n",
+ "source: Age target: demographics score: 0\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Unknown', similarity=0.289)]\n",
+ "source: Age target: menopause_status score: 0.289\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Anemia', similarity=0.342)]\n",
+ "source: Age target: comorbidities score: 0.342\n",
+ "value_matches: [ValueMatch(current_value='50.0', target_value='500', similarity=1.0), ValueMatch(current_value='45.0', target_value='450', similarity=1.0), ValueMatch(current_value='55.0', target_value='550', similarity=1.0), ValueMatch(current_value='75.0', target_value='75', similarity=0.707), ValueMatch(current_value='90.0', target_value='90', similarity=0.699), ValueMatch(current_value='59.0', target_value='90', similarity=0.608), ValueMatch(current_value='69.0', target_value='90', similarity=0.602), ValueMatch(current_value='65.0', target_value='50', similarity=0.448), ValueMatch(current_value='44.0', target_value='400', similarity=0.443), ValueMatch(current_value='85.0', target_value='50', similarity=0.44), ValueMatch(current_value='62.0', target_value='220', similarity=0.388), ValueMatch(current_value='72.0', target_value='220', similarity=0.386), ValueMatch(current_value='42.0', target_value='220', similarity=0.383), ValueMatch(current_value='53.0', target_value='300', similarity=0.381), ValueMatch(current_value='63.0', target_value='300', similarity=0.378), ValueMatch(current_value='73.0', target_value='300', similarity=0.376), ValueMatch(current_value='83.0', target_value='300', similarity=0.372), ValueMatch(current_value='64.0', target_value='400', similarity=0.371), ValueMatch(current_value='74.0', target_value='400', similarity=0.369), ValueMatch(current_value='60.0', target_value='500', similarity=0.298), ValueMatch(current_value='70.0', target_value='500', similarity=0.289), ValueMatch(current_value='58.0', target_value='180', similarity=0.288), ValueMatch(current_value='68.0', target_value='180', similarity=0.285), ValueMatch(current_value='78.0', target_value='180', similarity=0.283), ValueMatch(current_value='48.0', target_value='180', similarity=0.281), ValueMatch(current_value='38.0', target_value='180', similarity=0.277), ValueMatch(current_value='77.0', target_value='75', similarity=0.275), ValueMatch(current_value='57.0', target_value='75', similarity=0.256)]\n",
+ "source: Age target: current_weight score: 12.883\n",
+ "value_matches: [ValueMatch(current_value='50.0', target_value='50.0', similarity=1.0), ValueMatch(current_value='44.0', target_value='34.4', similarity=0.502), ValueMatch(current_value='55.0', target_value='55.6', similarity=0.497), ValueMatch(current_value='56.0', target_value='55.6', similarity=0.462), ValueMatch(current_value='41.0', target_value='41.8', similarity=0.45), ValueMatch(current_value='83.0', target_value='68.3', similarity=0.446), ValueMatch(current_value='53.0', target_value='45.3', similarity=0.431), ValueMatch(current_value='45.0', target_value='45.3', similarity=0.425), ValueMatch(current_value='72.0', target_value='87.2', similarity=0.423), ValueMatch(current_value='68.0', target_value='68.3', similarity=0.419), ValueMatch(current_value='62.0', target_value='62.1', similarity=0.407), ValueMatch(current_value='74.0', target_value='74.9', similarity=0.4), ValueMatch(current_value='90.0', target_value='29.0', similarity=0.393), ValueMatch(current_value='58.0', target_value='80.5', similarity=0.361), ValueMatch(current_value='60.0', target_value='50.0', similarity=0.358), ValueMatch(current_value='70.0', target_value='50.0', similarity=0.348), ValueMatch(current_value='59.0', target_value='29.0', similarity=0.343), ValueMatch(current_value='69.0', target_value='29.0', similarity=0.343), ValueMatch(current_value='65.0', target_value='50.0', similarity=0.323), ValueMatch(current_value='75.0', target_value='50.0', similarity=0.32), ValueMatch(current_value='85.0', target_value='50.0', similarity=0.319), ValueMatch(current_value='78.0', target_value='80.5', similarity=0.282), ValueMatch(current_value='48.0', target_value='80.5', similarity=0.281), ValueMatch(current_value='38.0', target_value='80.5', similarity=0.278)]\n",
+ "source: Age target: epithelioid_cell_percent score: 9.811000000000003\n",
+ "value_matches: [ValueMatch(current_value='85.0', target_value='85.0', similarity=1.0), ValueMatch(current_value='58.0', target_value='58.8', similarity=0.473), ValueMatch(current_value='74.0', target_value='74.5', similarity=0.433), ValueMatch(current_value='63.0', target_value='63.4', similarity=0.427), ValueMatch(current_value='44.0', target_value='40.0', similarity=0.42), ValueMatch(current_value='56.0', target_value='45.6', similarity=0.418), ValueMatch(current_value='69.0', target_value='69.1', similarity=0.416), ValueMatch(current_value='73.0', target_value='97.3', similarity=0.415), ValueMatch(current_value='45.0', target_value='45.6', similarity=0.407), ValueMatch(current_value='90.0', target_value='90.8', similarity=0.381), ValueMatch(current_value='64.0', target_value='40.0', similarity=0.361), ValueMatch(current_value='50.0', target_value='40.0', similarity=0.335), ValueMatch(current_value='60.0', target_value='40.0', similarity=0.332), ValueMatch(current_value='59.0', target_value='90.8', similarity=0.331), ValueMatch(current_value='55.0', target_value='85.0', similarity=0.324), ValueMatch(current_value='70.0', target_value='40.0', similarity=0.323), ValueMatch(current_value='65.0', target_value='85.0', similarity=0.269), ValueMatch(current_value='75.0', target_value='85.0', similarity=0.267)]\n",
+ "source: Age target: percent_tumor_invasion score: 7.332000000000001\n",
+ "value_matches: [ValueMatch(current_value='42.0', target_value='2024', similarity=0.319), ValueMatch(current_value='62.0', target_value='2020', similarity=0.316), ValueMatch(current_value='72.0', target_value='2020', similarity=0.313)]\n",
+ "source: Age target: year_of_death score: 0.948\n",
+ "value_matches: [ValueMatch(current_value='73.0', target_value='730', similarity=1.0), ValueMatch(current_value='65.0', target_value='-3650', similarity=0.766), ValueMatch(current_value='90.0', target_value='90', similarity=0.676), ValueMatch(current_value='45.0', target_value='-14500', similarity=0.656), ValueMatch(current_value='69.0', target_value='90', similarity=0.587), ValueMatch(current_value='59.0', target_value='90', similarity=0.586), ValueMatch(current_value='50.0', target_value='-14500', similarity=0.579), ValueMatch(current_value='72.0', target_value='32872', similarity=0.374), ValueMatch(current_value='60.0', target_value='0', similarity=0.328), ValueMatch(current_value='70.0', target_value='0', similarity=0.32), ValueMatch(current_value='63.0', target_value='730', similarity=0.31), ValueMatch(current_value='53.0', target_value='730', similarity=0.309), ValueMatch(current_value='83.0', target_value='730', similarity=0.307), ValueMatch(current_value='58.0', target_value='180', similarity=0.281), ValueMatch(current_value='68.0', target_value='180', similarity=0.281), ValueMatch(current_value='78.0', target_value='180', similarity=0.281), ValueMatch(current_value='38.0', target_value='180', similarity=0.279), ValueMatch(current_value='48.0', target_value='180', similarity=0.275), ValueMatch(current_value='55.0', target_value='-3650', similarity=0.262)]\n",
+ "source: Age target: days_to_last_known_disease_status score: 8.457\n",
+ "value_matches: [ValueMatch(current_value='55.0', target_value='550X', similarity=0.859), ValueMatch(current_value='45.0', target_value='450X', similarity=0.845), ValueMatch(current_value='65.0', target_value='650X', similarity=0.839), ValueMatch(current_value='70.0', target_value='700X', similarity=0.834), ValueMatch(current_value='60.0', target_value='600X', similarity=0.821), ValueMatch(current_value='50.0', target_value='500X', similarity=0.811), ValueMatch(current_value='44.0', target_value='400X', similarity=0.382), ValueMatch(current_value='77.0', target_value='700X', similarity=0.367), ValueMatch(current_value='61.0', target_value='100X', similarity=0.357), ValueMatch(current_value='62.0', target_value='200X', similarity=0.357), ValueMatch(current_value='72.0', target_value='200X', similarity=0.353), ValueMatch(current_value='71.0', target_value='100X', similarity=0.353), ValueMatch(current_value='42.0', target_value='200X', similarity=0.351), ValueMatch(current_value='41.0', target_value='100X', similarity=0.351), ValueMatch(current_value='63.0', target_value='30X', similarity=0.345), ValueMatch(current_value='53.0', target_value='30X', similarity=0.345), ValueMatch(current_value='66.0', target_value='60X', similarity=0.344), ValueMatch(current_value='73.0', target_value='30X', similarity=0.341), ValueMatch(current_value='83.0', target_value='30X', similarity=0.335), ValueMatch(current_value='64.0', target_value='400X', similarity=0.322), ValueMatch(current_value='74.0', target_value='400X', similarity=0.318), ValueMatch(current_value='57.0', target_value='700X', similarity=0.308), ValueMatch(current_value='67.0', target_value='700X', similarity=0.307), ValueMatch(current_value='56.0', target_value='60X', similarity=0.279), ValueMatch(current_value='76.0', target_value='60X', similarity=0.274), ValueMatch(current_value='46.0', target_value='60X', similarity=0.272), ValueMatch(current_value='86.0', target_value='60X', similarity=0.27)]\n",
+ "source: Age target: mean_coverage score: 11.94\n",
+ "Top k reranked columns: [('Age', 'age_at_last_exposure', 18.641), ('Age', 'age_at_onset', 16.569), ('Age', 'current_weight', 12.883), ('Age', 'mean_coverage', 11.94), ('Age', 'gross_tumor_weight', 10.684000000000001), ('Age', 'days_to_birth', 10.608999999999998), ('Age', 'initial_weight', 10.428999999999998), ('Age', 'age_at_diagnosis', 10.322), ('Age', 'relationship_age_at_diagnosis', 10.068000000000001), ('Age', 'epithelioid_cell_percent', 9.811000000000003), ('Age', 'days_to_last_known_disease_status', 8.457), ('Age', 'age_at_index', 8.264999999999999), ('Age', 'fev1_fvc_post_bronch_percent', 8.032000000000002), ('Age', 'percent_tumor_invasion', 7.332000000000001), ('Age', 'weight', 7.3069999999999995), ('Age', 'fev1_fvc_pre_bronch_percent', 6.075), ('Age', 'year_of_death', 0.948), ('Age', 'comorbidities', 0.342), ('Age', 'menopause_status', 0.289), ('Age', 'demographics', 0)]\n",
+ "\n",
+ "Source column: Race\n",
+ "value_matches: [ValueMatch(current_value='White', target_value='white', similarity=1.0), ValueMatch(current_value=' White', target_value='white', similarity=1.0), ValueMatch(current_value='Not Reported', target_value='not reported', similarity=1.0), ValueMatch(current_value='Asian', target_value='asian', similarity=1.0), ValueMatch(current_value='Black or African American', target_value='black or african american', similarity=1.0), ValueMatch(current_value='nan', target_value='american indian or alaska native', similarity=0.353)]\n",
+ "source: Race target: race score: 5.353\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='not reported', similarity=1.0), ValueMatch(current_value='Black or African American', target_value='hispanic or latino', similarity=0.311)]\n",
+ "source: Race target: ethnicity score: 1.311\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='Not Reported', similarity=1.0), ValueMatch(current_value='White', target_value='White Matter', similarity=0.66), ValueMatch(current_value=' White', target_value='White Matter', similarity=0.66)]\n",
+ "source: Race target: supratentorial_localization score: 2.3200000000000003\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='Unrelated', similarity=0.291)]\n",
+ "source: Race target: relationship_type score: 0.291\n",
+ "value_matches: [ValueMatch(current_value='Asian', target_value='Malaysia', similarity=0.334), ValueMatch(current_value='Not Reported', target_value='Federated States of Micronesia', similarity=0.3), ValueMatch(current_value='nan', target_value='China', similarity=0.264)]\n",
+ "source: Race target: country_of_birth score: 0.898\n",
+ "value_matches: [ValueMatch(current_value='Black or African American', target_value='Family History of Cancer', similarity=0.335)]\n",
+ "source: Race target: demographics score: 0.335\n",
+ "value_matches: []\n",
+ "source: Race target: well_number score: 0\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='Not Reported', similarity=1.0)]\n",
+ "source: Race target: eye_color score: 1.0\n",
+ "value_matches: [ValueMatch(current_value='Asian', target_value='Not Classified', similarity=0.317), ValueMatch(current_value='Not Reported', target_value='Not Classified', similarity=0.267)]\n",
+ "source: Race target: fab_morphology_code score: 0.5840000000000001\n",
+ "value_matches: []\n",
+ "source: Race target: plate_well score: 0\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='Not Reported', similarity=1.0)]\n",
+ "source: Race target: education_level score: 1.0\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='not reported', similarity=1.0)]\n",
+ "source: Race target: gender score: 1.0\n",
+ "value_matches: [ValueMatch(current_value='Black or African American', target_value='France', similarity=0.281), ValueMatch(current_value='Asian', target_value='Saudi Arabia', similarity=0.262)]\n",
+ "source: Race target: country_of_residence_at_enrollment score: 0.543\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='Not Reported', similarity=1.0)]\n",
+ "source: Race target: child_pugh_classification score: 1.0\n",
+ "value_matches: [ValueMatch(current_value='White', target_value='BWH', similarity=0.279), ValueMatch(current_value=' White', target_value='BWH', similarity=0.279)]\n",
+ "source: Race target: short_name score: 0.558\n",
+ "value_matches: [ValueMatch(current_value='Black or African American', target_value='Respirable Crystalline Silica', similarity=0.307), ValueMatch(current_value='nan', target_value='Marijuana', similarity=0.292)]\n",
+ "source: Race target: exposure_type score: 0.599\n",
+ "value_matches: [ValueMatch(current_value='Not Reported', target_value='Not Reported', similarity=1.0)]\n",
+ "source: Race target: alcohol_type score: 1.0\n",
+ "value_matches: []\n",
+ "source: Race target: base_caller_version score: 0\n",
+ "value_matches: [ValueMatch(current_value='Black or African American', target_value='Melanocytic', similarity=0.261)]\n",
+ "source: Race target: roots score: 0.261\n",
+ "value_matches: [ValueMatch(current_value='Black or African American', target_value='Technical Variation Minimization', similarity=0.253)]\n",
+ "source: Race target: methylation_array_harmonization_workflows score: 0.253\n",
+ "Top k reranked columns: [('Race', 'race', 5.353), ('Race', 'supratentorial_localization', 2.3200000000000003), ('Race', 'ethnicity', 1.311), ('Race', 'eye_color', 1.0), ('Race', 'education_level', 1.0), ('Race', 'gender', 1.0), ('Race', 'child_pugh_classification', 1.0), ('Race', 'alcohol_type', 1.0), ('Race', 'country_of_birth', 0.898), ('Race', 'exposure_type', 0.599), ('Race', 'fab_morphology_code', 0.5840000000000001), ('Race', 'short_name', 0.558), ('Race', 'country_of_residence_at_enrollment', 0.543), ('Race', 'demographics', 0.335), ('Race', 'relationship_type', 0.291), ('Race', 'roots', 0.261), ('Race', 'methylation_array_harmonization_workflows', 0.253), ('Race', 'well_number', 0), ('Race', 'plate_well', 0), ('Race', 'base_caller_version', 0)]\n",
+ "\n",
+ "Source column: Ethnicity\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='not reported', similarity=1.0), ValueMatch(current_value='Hispanic or Latino', target_value='hispanic or latino', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='not hispanic or latino', similarity=0.937)]\n",
+ "source: Ethnicity target: ethnicity score: 2.9370000000000003\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='not reported', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='native hawaiian or other pacific islander', similarity=0.453), ValueMatch(current_value='Hispanic or Latino', target_value='native hawaiian or other pacific islander', similarity=0.446), ValueMatch(current_value='nan', target_value='american indian or alaska native', similarity=0.334)]\n",
+ "source: Ethnicity target: race score: 2.233\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='not reported', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='not reported', similarity=0.307)]\n",
+ "source: Ethnicity target: gender score: 1.307\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='not reported', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='not reported', similarity=0.307)]\n",
+ "source: Ethnicity target: relationship_gender score: 1.307\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='Federated States of Micronesia', similarity=0.314), ValueMatch(current_value='Not-Hispanic or Latino', target_value='State of Palestine', similarity=0.289), ValueMatch(current_value='Hispanic or Latino', target_value='State of Palestine', similarity=0.289), ValueMatch(current_value='nan', target_value='China', similarity=0.263)]\n",
+ "source: Ethnicity target: country_of_birth score: 1.1549999999999998\n",
+ "value_matches: [ValueMatch(current_value='Hispanic or Latino', target_value='Geographic Location', similarity=0.278), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Geographic Location', similarity=0.265)]\n",
+ "source: Ethnicity target: demographics score: 0.543\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='Not Reported', similarity=1.0), ValueMatch(current_value='Hispanic or Latino', target_value='Spinal Cord', similarity=0.284), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Spinal Cord', similarity=0.265)]\n",
+ "source: Ethnicity target: supratentorial_localization score: 1.549\n",
+ "value_matches: []\n",
+ "source: Ethnicity target: well_number score: 0\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='Not Reported', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Not Reported', similarity=0.293)]\n",
+ "source: Ethnicity target: eye_color score: 1.293\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='Not Reported', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Not Reported', similarity=0.31)]\n",
+ "source: Ethnicity target: child_pugh_classification score: 1.31\n",
+ "value_matches: []\n",
+ "source: Ethnicity target: ajcc_pathologic_n score: 0\n",
+ "value_matches: [ValueMatch(current_value='Not-Hispanic or Latino', target_value='Not Classified', similarity=0.378), ValueMatch(current_value='Hispanic or Latino', target_value='Not Classified', similarity=0.34), ValueMatch(current_value='Not reported', target_value='Not Classified', similarity=0.262)]\n",
+ "source: Ethnicity target: fab_morphology_code score: 0.98\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='Unrelated', similarity=0.317)]\n",
+ "source: Ethnicity target: relationship_type score: 0.317\n",
+ "value_matches: []\n",
+ "source: Ethnicity target: plate_well score: 0\n",
+ "value_matches: []\n",
+ "source: Ethnicity target: ajcc_clinical_n score: 0\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='Separated', similarity=0.311), ValueMatch(current_value='Hispanic or Latino', target_value='Domestic Partnership', similarity=0.295), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Domestic Partnership', similarity=0.283)]\n",
+ "source: Ethnicity target: marital_status score: 0.889\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='not reported', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='no', similarity=0.361), ValueMatch(current_value='Hispanic or Latino', target_value='no', similarity=0.254)]\n",
+ "source: Ethnicity target: relative_with_cancer_history score: 1.615\n",
+ "value_matches: [ValueMatch(current_value='Not reported', target_value='Not Reported', similarity=1.0), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Not Reported', similarity=0.299)]\n",
+ "source: Ethnicity target: histone_family score: 1.299\n",
+ "value_matches: [ValueMatch(current_value='Hispanic or Latino', target_value='Occupational', similarity=0.32), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Occupational', similarity=0.309)]\n",
+ "source: Ethnicity target: exposure_source score: 0.629\n",
+ "value_matches: [ValueMatch(current_value='Hispanic or Latino', target_value='Kiribati', similarity=0.291), ValueMatch(current_value='Not-Hispanic or Latino', target_value='Kiribati', similarity=0.259)]\n",
+ "source: Ethnicity target: country_of_residence_at_enrollment score: 0.55\n",
+ "Top k reranked columns: [('Ethnicity', 'ethnicity', 2.9370000000000003), ('Ethnicity', 'race', 2.233), ('Ethnicity', 'relative_with_cancer_history', 1.615), ('Ethnicity', 'supratentorial_localization', 1.549), ('Ethnicity', 'child_pugh_classification', 1.31), ('Ethnicity', 'gender', 1.307), ('Ethnicity', 'relationship_gender', 1.307), ('Ethnicity', 'histone_family', 1.299), ('Ethnicity', 'eye_color', 1.293), ('Ethnicity', 'country_of_birth', 1.1549999999999998), ('Ethnicity', 'fab_morphology_code', 0.98), ('Ethnicity', 'marital_status', 0.889), ('Ethnicity', 'exposure_source', 0.629), ('Ethnicity', 'country_of_residence_at_enrollment', 0.55), ('Ethnicity', 'demographics', 0.543), ('Ethnicity', 'relationship_type', 0.317), ('Ethnicity', 'well_number', 0), ('Ethnicity', 'ajcc_pathologic_n', 0), ('Ethnicity', 'plate_well', 0), ('Ethnicity', 'ajcc_clinical_n', 0)]\n",
+ "\n",
+ "Source column: Gender\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='female', similarity=1.0)]\n",
+ "source: Gender target: gender score: 1.0\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='female', similarity=1.0)]\n",
+ "source: Gender target: relationship_gender score: 1.0\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='No', similarity=0.273)]\n",
+ "source: Gender target: premature_at_birth score: 0.273\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='No', similarity=0.273)]\n",
+ "source: Gender target: pregnant_at_diagnosis score: 0.273\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Ectopic Pregnancy', similarity=0.304)]\n",
+ "source: Gender target: pregnancy_outcome score: 0.304\n",
+ "value_matches: []\n",
+ "source: Gender target: marital_status score: 0\n",
+ "value_matches: []\n",
+ "source: Gender target: menopause_status score: 0\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='Maternal Half Sister', similarity=0.27)]\n",
+ "source: Gender target: relationship_type score: 0.27\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='False', similarity=0.276)]\n",
+ "source: Gender target: age_is_obfuscated score: 0.276\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='No', similarity=0.273)]\n",
+ "source: Gender target: tumor_confined_to_organ_of_origin score: 0.273\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='Guatemala', similarity=0.413), ValueMatch(current_value='nan', target_value='Antigua and Barbuda', similarity=0.273)]\n",
+ "source: Gender target: country_of_birth score: 0.6859999999999999\n",
+ "value_matches: []\n",
+ "source: Gender target: hysterectomy_type score: 0\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='Guatemala', similarity=0.42)]\n",
+ "source: Gender target: country_of_residence_at_enrollment score: 0.42\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Anus and anal canal', similarity=0.549), ValueMatch(current_value='Female', target_value='Other and unspecified female genital organs', similarity=0.457)]\n",
+ "source: Gender target: primary_site score: 1.006\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='False', similarity=0.276)]\n",
+ "source: Gender target: selected_normal_wxs score: 0.276\n",
+ "value_matches: []\n",
+ "source: Gender target: weeks_gestation_at_birth score: 0\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='False', similarity=0.276)]\n",
+ "source: Gender target: selected_normal_wgs score: 0.276\n",
+ "value_matches: []\n",
+ "source: Gender target: ethnicity score: 0\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='Alleles', similarity=0.306)]\n",
+ "source: Gender target: variant_type score: 0.306\n",
+ "value_matches: [ValueMatch(current_value='Female', target_value='Left', similarity=0.269)]\n",
+ "source: Gender target: laterality score: 0.269\n",
+ "Top k reranked columns: [('Gender', 'primary_site', 1.006), ('Gender', 'gender', 1.0), ('Gender', 'relationship_gender', 1.0), ('Gender', 'country_of_birth', 0.6859999999999999), ('Gender', 'country_of_residence_at_enrollment', 0.42), ('Gender', 'variant_type', 0.306), ('Gender', 'pregnancy_outcome', 0.304), ('Gender', 'age_is_obfuscated', 0.276), ('Gender', 'selected_normal_wxs', 0.276), ('Gender', 'selected_normal_wgs', 0.276), ('Gender', 'premature_at_birth', 0.273), ('Gender', 'pregnant_at_diagnosis', 0.273), ('Gender', 'tumor_confined_to_organ_of_origin', 0.273), ('Gender', 'relationship_type', 0.27), ('Gender', 'laterality', 0.269), ('Gender', 'marital_status', 0), ('Gender', 'menopause_status', 0), ('Gender', 'hysterectomy_type', 0), ('Gender', 'weeks_gestation_at_birth', 0), ('Gender', 'ethnicity', 0)]\n",
+ "\n",
+ "Source column: Tumor_Focality\n",
+ "value_matches: [ValueMatch(current_value='Unifocal', target_value='Unifocal', similarity=1.0), ValueMatch(current_value='Multifocal', target_value='Multifocal', similarity=1.0)]\n",
+ "source: Tumor_Focality target: tumor_focality score: 2.0\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Unknown', similarity=0.305)]\n",
+ "source: Tumor_Focality target: tumor_shape score: 0.305\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: tumor_depth_descriptor score: 0\n",
+ "value_matches: [ValueMatch(current_value='Unifocal', target_value='Focal', similarity=0.667), ValueMatch(current_value='Multifocal', target_value='Focal', similarity=0.586)]\n",
+ "source: Tumor_Focality target: extracapsular_extension score: 1.2530000000000001\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Many', similarity=0.327)]\n",
+ "source: Tumor_Focality target: tumor_infiltrating_macrophages score: 0.327\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: enneking_msts_tumor_site score: 0\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: tissue_type score: 0\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='No', similarity=0.271)]\n",
+ "source: Tumor_Focality target: history_of_tumor score: 0.271\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: distance_normal_to_tumor score: 0\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: residual_tumor_measurement score: 0\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: wilms_tumor_histologic_subtype score: 0\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: tumor_width_measurement score: 0\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='No', similarity=0.259)]\n",
+ "source: Tumor_Focality target: tumor_confined_to_organ_of_origin score: 0.259\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='No', similarity=0.259)]\n",
+ "source: Tumor_Focality target: non_nodal_tumor_deposits score: 0.259\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: tumor_thickness score: 0\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='No Extranodal Extension', similarity=0.254)]\n",
+ "source: Tumor_Focality target: extranodal_extension score: 0.254\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Many', similarity=0.327)]\n",
+ "source: Tumor_Focality target: tumor_infiltrating_lymphocytes score: 0.327\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: margin_distance score: 0\n",
+ "value_matches: []\n",
+ "source: Tumor_Focality target: tumor_grade_category score: 0\n",
+ "value_matches: [ValueMatch(current_value='nan', target_value='Manta', similarity=0.269)]\n",
+ "source: Tumor_Focality target: somatic_mutation_calling_workflows score: 0.269\n",
+ "Top k reranked columns: [('Tumor_Focality', 'tumor_focality', 2.0), ('Tumor_Focality', 'extracapsular_extension', 1.2530000000000001), ('Tumor_Focality', 'tumor_infiltrating_macrophages', 0.327), ('Tumor_Focality', 'tumor_infiltrating_lymphocytes', 0.327), ('Tumor_Focality', 'tumor_shape', 0.305), ('Tumor_Focality', 'history_of_tumor', 0.271), ('Tumor_Focality', 'somatic_mutation_calling_workflows', 0.269), ('Tumor_Focality', 'tumor_confined_to_organ_of_origin', 0.259), ('Tumor_Focality', 'non_nodal_tumor_deposits', 0.259), ('Tumor_Focality', 'extranodal_extension', 0.254), ('Tumor_Focality', 'tumor_depth_descriptor', 0), ('Tumor_Focality', 'enneking_msts_tumor_site', 0), ('Tumor_Focality', 'tissue_type', 0), ('Tumor_Focality', 'distance_normal_to_tumor', 0), ('Tumor_Focality', 'residual_tumor_measurement', 0), ('Tumor_Focality', 'wilms_tumor_histologic_subtype', 0), ('Tumor_Focality', 'tumor_width_measurement', 0), ('Tumor_Focality', 'tumor_thickness', 0), ('Tumor_Focality', 'margin_distance', 0), ('Tumor_Focality', 'tumor_grade_category', 0)]\n",
+ "\n",
+ "Source column: Tumor_Size_cm\n",
+ "value_matches: [ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='0.9', target_value='0.9', similarity=1.0), ValueMatch(current_value='3.7', target_value='3.7', similarity=1.0), ValueMatch(current_value='8.5', target_value='8.5', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='4.3', target_value='4.3', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='1.8', target_value='1.8', similarity=1.0), ValueMatch(current_value='1.0', target_value='10.4', similarity=0.595), ValueMatch(current_value='9.0', target_value='0.9', similarity=0.452), ValueMatch(current_value='2.7', target_value='7.2', similarity=0.416), ValueMatch(current_value='11.0', target_value='10.4', similarity=0.407), ValueMatch(current_value='4.5', target_value='5.4', similarity=0.395), ValueMatch(current_value='5.2', target_value='2.5', similarity=0.387), ValueMatch(current_value='3.5', target_value='3.3', similarity=0.331), ValueMatch(current_value='5.5', target_value='2.5', similarity=0.331), ValueMatch(current_value='2.2', target_value='2.5', similarity=0.331), ValueMatch(current_value='1.3', target_value='3.3', similarity=0.329), ValueMatch(current_value='2.6', target_value='6.0', similarity=0.318), ValueMatch(current_value='6.3', target_value='6.0', similarity=0.318), ValueMatch(current_value='6.5', target_value='6.0', similarity=0.318), ValueMatch(current_value='3.2', target_value='3.3', similarity=0.312), ValueMatch(current_value='3.0', target_value='3.3', similarity=0.312), ValueMatch(current_value='2.3', target_value='3.3', similarity=0.312), ValueMatch(current_value='3.8', target_value='3.3', similarity=0.299), ValueMatch(current_value='3.9', target_value='3.3', similarity=0.296), ValueMatch(current_value='9.5', target_value='0.9', similarity=0.279), ValueMatch(current_value='2.9', target_value='0.9', similarity=0.279), ValueMatch(current_value='1.4', target_value='10.4', similarity=0.271), ValueMatch(current_value='4.0', target_value='10.4', similarity=0.266), ValueMatch(current_value='7.0', target_value='3.7', similarity=0.263), ValueMatch(current_value='8.0', target_value='8.5', similarity=0.263), ValueMatch(current_value='0.8', target_value='8.5', similarity=0.263), ValueMatch(current_value='1.7', target_value='3.7', similarity=0.262), ValueMatch(current_value='4.7', target_value='3.7', similarity=0.257)]\n",
+ "source: Tumor_Size_cm target: tumor_width_measurement score: 16.862000000000002\n",
+ "value_matches: [ValueMatch(current_value='3.5', target_value='3.5', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='1.5', target_value='1.5', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='0.8', target_value='0.8', similarity=1.0), ValueMatch(current_value='3.0', target_value='3.0', similarity=1.0), ValueMatch(current_value='4.2', target_value='4.2', similarity=1.0), ValueMatch(current_value='4.7', target_value='4.7', similarity=1.0), ValueMatch(current_value='13.5', target_value='3.5', similarity=0.61), ValueMatch(current_value='6.5', target_value='5.6', similarity=0.437), ValueMatch(current_value='8.0', target_value='0.8', similarity=0.429), ValueMatch(current_value='5.5', target_value='3.5', similarity=0.334), ValueMatch(current_value='2.2', target_value='1.2', similarity=0.329), ValueMatch(current_value='6.3', target_value='6.0', similarity=0.297), ValueMatch(current_value='2.6', target_value='6.0', similarity=0.297), ValueMatch(current_value='9.5', target_value='9.1', similarity=0.282), ValueMatch(current_value='9.0', target_value='9.1', similarity=0.281), ValueMatch(current_value='2.9', target_value='9.1', similarity=0.281), ValueMatch(current_value='0.9', target_value='9.1', similarity=0.281), ValueMatch(current_value='3.9', target_value='9.1', similarity=0.281), ValueMatch(current_value='2.7', target_value='4.7', similarity=0.27), ValueMatch(current_value='3.7', target_value='4.7', similarity=0.27), ValueMatch(current_value='7.0', target_value='4.7', similarity=0.27), ValueMatch(current_value='1.7', target_value='4.7', similarity=0.269), ValueMatch(current_value='4.5', target_value='4.2', similarity=0.25), ValueMatch(current_value='8.5', target_value='0.8', similarity=0.25)]\n",
+ "source: Tumor_Size_cm target: tumor_depth_measurement score: 13.718000000000002\n",
+ "value_matches: [ValueMatch(current_value='5.2', target_value='5.2', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='1.4', target_value='1.4', similarity=1.0), ValueMatch(current_value='3.7', target_value='3.7', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='1.8', target_value='1.8', similarity=1.0), ValueMatch(current_value='7.0', target_value='7.0', similarity=1.0), ValueMatch(current_value='2.2', target_value='2.2', similarity=1.0), ValueMatch(current_value='0.9', target_value='0.9', similarity=1.0), ValueMatch(current_value='9.0', target_value='0.9', similarity=0.452), ValueMatch(current_value='9.5', target_value='5.9', similarity=0.428), ValueMatch(current_value='8.0', target_value='8.8', similarity=0.414), ValueMatch(current_value='3.8', target_value='8.8', similarity=0.414), ValueMatch(current_value='0.8', target_value='8.8', similarity=0.414), ValueMatch(current_value='8.5', target_value='8.8', similarity=0.414), ValueMatch(current_value='3.2', target_value='2.2', similarity=0.337), ValueMatch(current_value='2.3', target_value='2.2', similarity=0.337), ValueMatch(current_value='1.2', target_value='2.2', similarity=0.334), ValueMatch(current_value='3.5', target_value='3.3', similarity=0.331), ValueMatch(current_value='5.5', target_value='5.2', similarity=0.329), ValueMatch(current_value='1.3', target_value='3.3', similarity=0.327), ValueMatch(current_value='4.2', target_value='2.2', similarity=0.326), ValueMatch(current_value='2.7', target_value='2.2', similarity=0.323), ValueMatch(current_value='2.9', target_value='2.2', similarity=0.32), ValueMatch(current_value='2.6', target_value='2.2', similarity=0.316), ValueMatch(current_value='3.0', target_value='3.3', similarity=0.312), ValueMatch(current_value='4.3', target_value='3.3', similarity=0.301), ValueMatch(current_value='6.3', target_value='6.0', similarity=0.297), ValueMatch(current_value='6.5', target_value='6.0', similarity=0.297), ValueMatch(current_value='3.9', target_value='3.3', similarity=0.296), ValueMatch(current_value='1.7', target_value='7.0', similarity=0.26), ValueMatch(current_value='4.7', target_value='7.0', similarity=0.255)]\n",
+ "source: Tumor_Size_cm target: size_extraocular_nodule score: 16.834\n",
+ "value_matches: [ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='2.7', target_value='2.72', similarity=0.683), ValueMatch(current_value='3.9', target_value='2.39', similarity=0.654), ValueMatch(current_value='2.2', target_value='2.28', similarity=0.652), ValueMatch(current_value='8.5', target_value='1.85', similarity=0.648), ValueMatch(current_value='1.8', target_value='1.85', similarity=0.648), ValueMatch(current_value='6.3', target_value='1.63', similarity=0.639), ValueMatch(current_value='9.5', target_value='1.95', similarity=0.632), ValueMatch(current_value='2.6', target_value='2.61', similarity=0.631), ValueMatch(current_value='1.5', target_value='1.52', similarity=0.63), ValueMatch(current_value='1.7', target_value='2.17', similarity=0.621), ValueMatch(current_value='3.0', target_value='3.05', similarity=0.618), ValueMatch(current_value='5.2', target_value='1.52', similarity=0.618), ValueMatch(current_value='2.9', target_value='2.94', similarity=0.613), ValueMatch(current_value='2.3', target_value='2.39', similarity=0.605), ValueMatch(current_value='4.7', target_value='1.74', similarity=0.321), ValueMatch(current_value='5.5', target_value='2.5', similarity=0.317), ValueMatch(current_value='6.0', target_value='2.06', similarity=0.289), ValueMatch(current_value='3.8', target_value='2.83', similarity=0.277), ValueMatch(current_value='1.4', target_value='1.74', similarity=0.269), ValueMatch(current_value='4.2', target_value='2.94', similarity=0.252)]\n",
+ "source: Tumor_Size_cm target: body_surface_area score: 11.617\n",
+ "value_matches: [ValueMatch(current_value='3.5', target_value='3.5', similarity=1.0), ValueMatch(current_value='4.5', target_value='4.5', similarity=1.0), ValueMatch(current_value='4.0', target_value='4.0', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='5.2', target_value='5.2', similarity=1.0), ValueMatch(current_value='0.8', target_value='0.8', similarity=1.0), ValueMatch(current_value='1.5', target_value='1.5', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='13.5', target_value='3.5', similarity=0.608), ValueMatch(current_value='3.7', target_value='7.3', similarity=0.442), ValueMatch(current_value='8.0', target_value='0.8', similarity=0.429), ValueMatch(current_value='1.8', target_value='8.1', similarity=0.422), ValueMatch(current_value='4.2', target_value='2.4', similarity=0.4), ValueMatch(current_value='1.3', target_value='3.1', similarity=0.394), ValueMatch(current_value='2.5', target_value='5.2', similarity=0.367), ValueMatch(current_value='5.0', target_value='0.5', similarity=0.339), ValueMatch(current_value='6.5', target_value='6.0', similarity=0.324), ValueMatch(current_value='2.2', target_value='5.2', similarity=0.323), ValueMatch(current_value='2.6', target_value='6.0', similarity=0.322), ValueMatch(current_value='5.5', target_value='3.5', similarity=0.322), ValueMatch(current_value='6.3', target_value='6.0', similarity=0.321), ValueMatch(current_value='7.0', target_value='7.3', similarity=0.284), ValueMatch(current_value='2.7', target_value='7.3', similarity=0.283), ValueMatch(current_value='1.7', target_value='7.3', similarity=0.281), ValueMatch(current_value='4.7', target_value='7.3', similarity=0.274), ValueMatch(current_value='8.5', target_value='0.8', similarity=0.268), ValueMatch(current_value='3.8', target_value='0.8', similarity=0.266), ValueMatch(current_value='4.3', target_value='4.5', similarity=0.251), ValueMatch(current_value='1.4', target_value='4.5', similarity=0.25)]\n",
+ "source: Tumor_Size_cm target: tumor_thickness score: 15.170000000000002\n",
+ "value_matches: [ValueMatch(current_value='6.5', target_value='6.5', similarity=1.0), ValueMatch(current_value='1.5', target_value='1.5', similarity=1.0), ValueMatch(current_value='9.5', target_value='9.5', similarity=1.0), ValueMatch(current_value='4.2', target_value='4.2', similarity=1.0), ValueMatch(current_value='1.0', target_value='10.0', similarity=0.691), ValueMatch(current_value='11.0', target_value='10.0', similarity=0.458), ValueMatch(current_value='6.3', target_value='3.6', similarity=0.447), ValueMatch(current_value='3.7', target_value='7.3', similarity=0.422), ValueMatch(current_value='2.9', target_value='9.2', similarity=0.405), ValueMatch(current_value='8.5', target_value='5.8', similarity=0.4), ValueMatch(current_value='1.2', target_value='2.1', similarity=0.371), ValueMatch(current_value='5.5', target_value='1.5', similarity=0.317), ValueMatch(current_value='2.2', target_value='4.2', similarity=0.316), ValueMatch(current_value='2.6', target_value='6.5', similarity=0.299), ValueMatch(current_value='6.0', target_value='6.5', similarity=0.296), ValueMatch(current_value='4.5', target_value='4.2', similarity=0.264), ValueMatch(current_value='4.3', target_value='4.2', similarity=0.262), ValueMatch(current_value='1.4', target_value='4.2', similarity=0.262), ValueMatch(current_value='4.0', target_value='4.2', similarity=0.26), ValueMatch(current_value='13.5', target_value='1.5', similarity=0.259), ValueMatch(current_value='4.7', target_value='4.2', similarity=0.253)]\n",
+ "source: Tumor_Size_cm target: rin score: 9.982000000000001\n",
+ "value_matches: [ValueMatch(current_value='4.5', target_value='4.5', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='6.3', target_value='6.3', similarity=1.0), ValueMatch(current_value='7.0', target_value='7.0', similarity=1.0), ValueMatch(current_value='1.5', target_value='11.5', similarity=0.684), ValueMatch(current_value='1.0', target_value='10.1', similarity=0.672), ValueMatch(current_value='2.7', target_value='12.7', similarity=0.667), ValueMatch(current_value='3.9', target_value='13.9', similarity=0.665), ValueMatch(current_value='1.3', target_value='13.9', similarity=0.592), ValueMatch(current_value='11.0', target_value='10.1', similarity=0.514), ValueMatch(current_value='8.5', target_value='5.8', similarity=0.418), ValueMatch(current_value='4.2', target_value='2.4', similarity=0.405), ValueMatch(current_value='13.5', target_value='13.9', similarity=0.361), ValueMatch(current_value='5.0', target_value='0.5', similarity=0.352), ValueMatch(current_value='2.2', target_value='1.2', similarity=0.35), ValueMatch(current_value='6.5', target_value='6.3', similarity=0.322), ValueMatch(current_value='2.6', target_value='6.3', similarity=0.32), ValueMatch(current_value='6.0', target_value='6.3', similarity=0.32), ValueMatch(current_value='5.5', target_value='4.5', similarity=0.297), ValueMatch(current_value='1.7', target_value='12.7', similarity=0.292), ValueMatch(current_value='3.7', target_value='7.0', similarity=0.28), ValueMatch(current_value='4.7', target_value='7.0', similarity=0.271), ValueMatch(current_value='1.8', target_value='5.8', similarity=0.269), ValueMatch(current_value='3.8', target_value='5.8', similarity=0.268), ValueMatch(current_value='8.0', target_value='5.8', similarity=0.267), ValueMatch(current_value='0.8', target_value='5.8', similarity=0.267), ValueMatch(current_value='9.5', target_value='9.4', similarity=0.259), ValueMatch(current_value='0.9', target_value='9.4', similarity=0.257), ValueMatch(current_value='2.9', target_value='9.4', similarity=0.257), ValueMatch(current_value='9.0', target_value='9.4', similarity=0.257), ValueMatch(current_value='1.4', target_value='4.5', similarity=0.253), ValueMatch(current_value='4.3', target_value='4.5', similarity=0.252), ValueMatch(current_value='4.0', target_value='4.5', similarity=0.251)]\n",
+ "source: Tumor_Size_cm target: greatest_tumor_dimension score: 14.639\n",
+ "value_matches: [ValueMatch(current_value='3.5', target_value='3.5', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='0.8', target_value='0.8', similarity=1.0), ValueMatch(current_value='4.7', target_value='4.7', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='2.6', target_value='12.6', similarity=0.672), ValueMatch(current_value='3.8', target_value='13.8', similarity=0.662), ValueMatch(current_value='11.0', target_value='11.1', similarity=0.634), ValueMatch(current_value='5.0', target_value='15.0', similarity=0.629), ValueMatch(current_value='13.5', target_value='3.5', similarity=0.626), ValueMatch(current_value='1.5', target_value='15.0', similarity=0.626), ValueMatch(current_value='1.3', target_value='13.8', similarity=0.597), ValueMatch(current_value='1.0', target_value='10.5', similarity=0.593), ValueMatch(current_value='8.0', target_value='0.8', similarity=0.434), ValueMatch(current_value='2.7', target_value='7.2', similarity=0.428), ValueMatch(current_value='1.8', target_value='8.8', similarity=0.4), ValueMatch(current_value='8.5', target_value='8.8', similarity=0.398), ValueMatch(current_value='2.2', target_value='1.2', similarity=0.35), ValueMatch(current_value='5.5', target_value='3.5', similarity=0.334), ValueMatch(current_value='6.5', target_value='6.0', similarity=0.3), ValueMatch(current_value='6.3', target_value='6.0', similarity=0.299), ValueMatch(current_value='9.5', target_value='9.4', similarity=0.273), ValueMatch(current_value='9.0', target_value='9.4', similarity=0.273), ValueMatch(current_value='0.9', target_value='9.4', similarity=0.273), ValueMatch(current_value='2.9', target_value='9.4', similarity=0.272), ValueMatch(current_value='3.9', target_value='9.4', similarity=0.272), ValueMatch(current_value='1.7', target_value='4.7', similarity=0.27), ValueMatch(current_value='7.0', target_value='4.7', similarity=0.269), ValueMatch(current_value='3.7', target_value='4.7', similarity=0.267), ValueMatch(current_value='1.4', target_value='4.7', similarity=0.251), ValueMatch(current_value='4.5', target_value='4.7', similarity=0.25), ValueMatch(current_value='4.0', target_value='4.7', similarity=0.25)]\n",
+ "source: Tumor_Size_cm target: tumor_largest_dimension_diameter score: 15.901999999999996\n",
+ "value_matches: [ValueMatch(current_value='2.9', target_value='2.9', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='1.4', target_value='1.4', similarity=1.0), ValueMatch(current_value='1.7', target_value='1.7', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='2.2', target_value='2.2', similarity=1.0), ValueMatch(current_value='4.7', target_value='4.7', similarity=1.0), ValueMatch(current_value='3.8', target_value='3.8', similarity=1.0), ValueMatch(current_value='0.9', target_value='0.9', similarity=1.0), ValueMatch(current_value='9.0', target_value='0.9', similarity=0.447), ValueMatch(current_value='7.0', target_value='0.7', similarity=0.411), ValueMatch(current_value='5.2', target_value='2.5', similarity=0.387), ValueMatch(current_value='4.0', target_value='4.4', similarity=0.381), ValueMatch(current_value='4.2', target_value='4.4', similarity=0.379), ValueMatch(current_value='4.5', target_value='4.4', similarity=0.379), ValueMatch(current_value='4.3', target_value='4.4', similarity=0.377), ValueMatch(current_value='1.5', target_value='5.1', similarity=0.371), ValueMatch(current_value='5.0', target_value='0.5', similarity=0.358), ValueMatch(current_value='3.5', target_value='3.3', similarity=0.343), ValueMatch(current_value='1.3', target_value='3.3', similarity=0.341), ValueMatch(current_value='5.5', target_value='2.5', similarity=0.331), ValueMatch(current_value='3.0', target_value='3.3', similarity=0.325), ValueMatch(current_value='3.2', target_value='2.2', similarity=0.324), ValueMatch(current_value='2.3', target_value='2.2', similarity=0.324), ValueMatch(current_value='6.5', target_value='6.0', similarity=0.32), ValueMatch(current_value='2.6', target_value='6.0', similarity=0.32), ValueMatch(current_value='6.3', target_value='6.0', similarity=0.318), ValueMatch(current_value='2.7', target_value='2.2', similarity=0.312), ValueMatch(current_value='3.7', target_value='3.3', similarity=0.31), ValueMatch(current_value='3.9', target_value='3.3', similarity=0.307), ValueMatch(current_value='0.8', target_value='3.8', similarity=0.297), ValueMatch(current_value='8.0', target_value='3.8', similarity=0.297), ValueMatch(current_value='8.5', target_value='3.8', similarity=0.296), ValueMatch(current_value='1.8', target_value='3.8', similarity=0.294), ValueMatch(current_value='9.5', target_value='0.9', similarity=0.28)]\n",
+ "source: Tumor_Size_cm target: shortest_dimension score: 18.829\n",
+ "value_matches: [ValueMatch(current_value='4.5', target_value='4.5', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='4.0', target_value='4.0', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='5.5', target_value='5.5', similarity=1.0), ValueMatch(current_value='1.8', target_value='1.8', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='3.2', target_value='3.2', similarity=1.0), ValueMatch(current_value='0.8', target_value='0.8', similarity=1.0), ValueMatch(current_value='6.3', target_value='3.6', similarity=0.442), ValueMatch(current_value='8.0', target_value='0.8', similarity=0.437), ValueMatch(current_value='2.7', target_value='7.2', similarity=0.437), ValueMatch(current_value='2.3', target_value='3.2', similarity=0.38), ValueMatch(current_value='5.2', target_value='2.5', similarity=0.367), ValueMatch(current_value='1.5', target_value='5.1', similarity=0.367), ValueMatch(current_value='3.0', target_value='0.3', similarity=0.346), ValueMatch(current_value='5.0', target_value='0.5', similarity=0.333), ValueMatch(current_value='2.2', target_value='2.5', similarity=0.323), ValueMatch(current_value='3.5', target_value='5.5', similarity=0.323), ValueMatch(current_value='6.5', target_value='6.0', similarity=0.304), ValueMatch(current_value='2.6', target_value='6.0', similarity=0.303), ValueMatch(current_value='8.5', target_value='5.5', similarity=0.288), ValueMatch(current_value='7.0', target_value='7.2', similarity=0.287), ValueMatch(current_value='3.7', target_value='7.2', similarity=0.283), ValueMatch(current_value='3.8', target_value='0.8', similarity=0.283), ValueMatch(current_value='9.5', target_value='5.5', similarity=0.28), ValueMatch(current_value='1.7', target_value='7.2', similarity=0.28), ValueMatch(current_value='4.7', target_value='7.2', similarity=0.273), ValueMatch(current_value='4.2', target_value='4.0', similarity=0.268), ValueMatch(current_value='4.3', target_value='4.0', similarity=0.267), ValueMatch(current_value='1.4', target_value='4.0', similarity=0.264), ValueMatch(current_value='11.0', target_value='1.2', similarity=0.26)]\n",
+ "source: Tumor_Size_cm target: analyte_quantity score: 16.395000000000003\n",
+ "value_matches: [ValueMatch(current_value='3.5', target_value='3.5', similarity=1.0), ValueMatch(current_value='9.0', target_value='9.0', similarity=1.0), ValueMatch(current_value='0.8', target_value='0.8', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='4.3', target_value='14.3', similarity=0.648), ValueMatch(current_value='1.4', target_value='14.3', similarity=0.641), ValueMatch(current_value='13.5', target_value='3.5', similarity=0.615), ValueMatch(current_value='1.5', target_value='15.7', similarity=0.598), ValueMatch(current_value='1.0', target_value='10.5', similarity=0.596), ValueMatch(current_value='11.0', target_value='11.2', similarity=0.486), ValueMatch(current_value='0.9', target_value='9.0', similarity=0.452), ValueMatch(current_value='8.0', target_value='0.8', similarity=0.44), ValueMatch(current_value='3.8', target_value='8.3', similarity=0.416), ValueMatch(current_value='4.2', target_value='2.4', similarity=0.405), ValueMatch(current_value='2.2', target_value='1.2', similarity=0.362), ValueMatch(current_value='5.5', target_value='3.5', similarity=0.334), ValueMatch(current_value='9.5', target_value='9.0', similarity=0.28), ValueMatch(current_value='3.9', target_value='9.0', similarity=0.279), ValueMatch(current_value='2.9', target_value='9.0', similarity=0.279), ValueMatch(current_value='1.8', target_value='12.8', similarity=0.278), ValueMatch(current_value='6.5', target_value='7.6', similarity=0.273), ValueMatch(current_value='6.0', target_value='7.6', similarity=0.272), ValueMatch(current_value='6.3', target_value='7.6', similarity=0.272), ValueMatch(current_value='2.6', target_value='7.6', similarity=0.272), ValueMatch(current_value='1.7', target_value='15.7', similarity=0.271), ValueMatch(current_value='8.5', target_value='0.8', similarity=0.264)]\n",
+ "source: Tumor_Size_cm target: tumor_depth score: 12.732999999999999\n",
+ "value_matches: [ValueMatch(current_value='5.2', target_value='5.2', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='1.5', target_value='1.5', similarity=1.0), ValueMatch(current_value='3.7', target_value='3.7', similarity=1.0), ValueMatch(current_value='6.3', target_value='6.3', similarity=1.0), ValueMatch(current_value='2.2', target_value='2.2', similarity=1.0), ValueMatch(current_value='3.9', target_value='3.9', similarity=1.0), ValueMatch(current_value='4.7', target_value='7.4', similarity=0.451), ValueMatch(current_value='6.5', target_value='5.6', similarity=0.423), ValueMatch(current_value='1.7', target_value='7.1', similarity=0.422), ValueMatch(current_value='1.4', target_value='4.1', similarity=0.411), ValueMatch(current_value='3.2', target_value='2.2', similarity=0.339), ValueMatch(current_value='2.3', target_value='2.2', similarity=0.339), ValueMatch(current_value='1.2', target_value='2.2', similarity=0.336), ValueMatch(current_value='4.2', target_value='2.2', similarity=0.326), ValueMatch(current_value='2.9', target_value='2.2', similarity=0.323), ValueMatch(current_value='2.7', target_value='2.2', similarity=0.323), ValueMatch(current_value='3.5', target_value='3.3', similarity=0.321), ValueMatch(current_value='2.6', target_value='2.2', similarity=0.32), ValueMatch(current_value='1.3', target_value='3.3', similarity=0.317), ValueMatch(current_value='5.5', target_value='1.5', similarity=0.317), ValueMatch(current_value='3.0', target_value='3.3', similarity=0.295), ValueMatch(current_value='4.3', target_value='3.3', similarity=0.29), ValueMatch(current_value='3.8', target_value='3.3', similarity=0.281), ValueMatch(current_value='6.0', target_value='6.3', similarity=0.276), ValueMatch(current_value='8.5', target_value='6.8', similarity=0.27), ValueMatch(current_value='1.8', target_value='6.8', similarity=0.268), ValueMatch(current_value='9.5', target_value='3.9', similarity=0.266), ValueMatch(current_value='0.8', target_value='6.8', similarity=0.265), ValueMatch(current_value='8.0', target_value='6.8', similarity=0.265), ValueMatch(current_value='7.0', target_value='3.7', similarity=0.26), ValueMatch(current_value='13.5', target_value='1.5', similarity=0.26), ValueMatch(current_value='9.0', target_value='3.9', similarity=0.26), ValueMatch(current_value='0.9', target_value='3.9', similarity=0.26)]\n",
+ "source: Tumor_Size_cm target: imaging_suv score: 15.484000000000002\n",
+ "value_matches: [ValueMatch(current_value='0.8', target_value='0.8', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='1.3', target_value='11.3', similarity=0.689), ValueMatch(current_value='1.0', target_value='10.1', similarity=0.673), ValueMatch(current_value='2.6', target_value='12.6', similarity=0.667), ValueMatch(current_value='4.2', target_value='14.2', similarity=0.648), ValueMatch(current_value='1.4', target_value='14.2', similarity=0.641), ValueMatch(current_value='1.5', target_value='15.8', similarity=0.602), ValueMatch(current_value='11.0', target_value='10.1', similarity=0.516), ValueMatch(current_value='8.0', target_value='0.8', similarity=0.435), ValueMatch(current_value='6.5', target_value='5.6', similarity=0.428), ValueMatch(current_value='13.5', target_value='11.3', similarity=0.422), ValueMatch(current_value='1.7', target_value='7.1', similarity=0.418), ValueMatch(current_value='5.2', target_value='2.5', similarity=0.387), ValueMatch(current_value='2.2', target_value='1.2', similarity=0.35), ValueMatch(current_value='3.5', target_value='3.3', similarity=0.343), ValueMatch(current_value='5.5', target_value='2.5', similarity=0.331), ValueMatch(current_value='2.3', target_value='3.3', similarity=0.323), ValueMatch(current_value='3.2', target_value='3.3', similarity=0.323), ValueMatch(current_value='3.0', target_value='3.3', similarity=0.322), ValueMatch(current_value='4.3', target_value='3.3', similarity=0.313), ValueMatch(current_value='3.8', target_value='3.3', similarity=0.313), ValueMatch(current_value='3.9', target_value='3.3', similarity=0.307), ValueMatch(current_value='6.3', target_value='3.3', similarity=0.307), ValueMatch(current_value='3.7', target_value='3.3', similarity=0.307), ValueMatch(current_value='8.5', target_value='15.8', similarity=0.271), ValueMatch(current_value='2.7', target_value='7.1', similarity=0.267), ValueMatch(current_value='7.0', target_value='7.1', similarity=0.265), ValueMatch(current_value='6.0', target_value='5.6', similarity=0.263), ValueMatch(current_value='1.8', target_value='15.8', similarity=0.261), ValueMatch(current_value='4.7', target_value='7.1', similarity=0.259), ValueMatch(current_value='2.9', target_value='8.9', similarity=0.257), ValueMatch(current_value='9.5', target_value='8.9', similarity=0.257), ValueMatch(current_value='9.0', target_value='8.9', similarity=0.255), ValueMatch(current_value='0.9', target_value='8.9', similarity=0.255)]\n",
+ "source: Tumor_Size_cm target: tumor_length_measurement score: 15.675000000000002\n",
+ "value_matches: [ValueMatch(current_value='6.0', target_value='600', similarity=0.729), ValueMatch(current_value='7.0', target_value='700', similarity=0.725), ValueMatch(current_value='8.0', target_value='800', similarity=0.725), ValueMatch(current_value='4.0', target_value='400', similarity=0.722), ValueMatch(current_value='5.5', target_value='550', similarity=0.721), ValueMatch(current_value='3.0', target_value='300', similarity=0.709), ValueMatch(current_value='6.5', target_value='650', similarity=0.708), ValueMatch(current_value='8.5', target_value='850', similarity=0.703), ValueMatch(current_value='4.5', target_value='450', similarity=0.699), ValueMatch(current_value='1.5', target_value='150', similarity=0.691), ValueMatch(current_value='2.5', target_value='250', similarity=0.685), ValueMatch(current_value='3.5', target_value='350', similarity=0.666), ValueMatch(current_value='5.0', target_value='500', similarity=0.638), ValueMatch(current_value='13.5', target_value='350', similarity=0.398), ValueMatch(current_value='0.8', target_value='800', similarity=0.333), ValueMatch(current_value='1.0', target_value='150', similarity=0.265), ValueMatch(current_value='5.2', target_value='250', similarity=0.253), ValueMatch(current_value='2.2', target_value='250', similarity=0.251)]\n",
+ "source: Tumor_Size_cm target: average_insert_size score: 10.621\n",
+ "value_matches: [ValueMatch(current_value='3.5', target_value='3.5', similarity=1.0), ValueMatch(current_value='4.5', target_value='4.5', similarity=1.0), ValueMatch(current_value='5.0', target_value='5.0', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='4.0', target_value='4.0', similarity=1.0), ValueMatch(current_value='5.5', target_value='5.5', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='7.0', target_value='7.0', similarity=1.0), ValueMatch(current_value='1.0', target_value='1.0', similarity=1.0), ValueMatch(current_value='1.5', target_value='1.5', similarity=1.0), ValueMatch(current_value='8.0', target_value='8.0', similarity=1.0), ValueMatch(current_value='3.0', target_value='3.0', similarity=1.0), ValueMatch(current_value='6.5', target_value='6.5', similarity=1.0), ValueMatch(current_value='11.0', target_value='1.0', similarity=0.69), ValueMatch(current_value='13.5', target_value='3.5', similarity=0.602), ValueMatch(current_value='0.8', target_value='8.0', similarity=0.444), ValueMatch(current_value='5.2', target_value='2.5', similarity=0.374), ValueMatch(current_value='2.2', target_value='2.5', similarity=0.365), ValueMatch(current_value='8.5', target_value='8.0', similarity=0.309), ValueMatch(current_value='3.8', target_value='8.0', similarity=0.302), ValueMatch(current_value='6.3', target_value='6.5', similarity=0.302), ValueMatch(current_value='2.6', target_value='6.5', similarity=0.301), ValueMatch(current_value='1.8', target_value='8.0', similarity=0.299), ValueMatch(current_value='3.7', target_value='7.0', similarity=0.284), ValueMatch(current_value='2.7', target_value='7.0', similarity=0.282), ValueMatch(current_value='1.7', target_value='7.0', similarity=0.281), ValueMatch(current_value='4.7', target_value='7.0', similarity=0.274), ValueMatch(current_value='4.3', target_value='4.5', similarity=0.268), ValueMatch(current_value='4.2', target_value='4.5', similarity=0.266), ValueMatch(current_value='1.4', target_value='4.5', similarity=0.264), ValueMatch(current_value='9.5', target_value='5.5', similarity=0.253), ValueMatch(current_value='1.3', target_value='1.0', similarity=0.252)]\n",
+ "source: Tumor_Size_cm target: circumferential_resection_margin score: 19.412\n",
+ "value_matches: [ValueMatch(current_value='5.5', target_value='v5.5.5', similarity=0.801), ValueMatch(current_value='6.0', target_value='v6.0.0', similarity=0.584), ValueMatch(current_value='4.0', target_value='v4.0.0', similarity=0.576), ValueMatch(current_value='8.0', target_value='v8.0.0', similarity=0.575), ValueMatch(current_value='1.0', target_value='v1.0.0', similarity=0.54), ValueMatch(current_value='7.0', target_value='v7.0.3', similarity=0.505), ValueMatch(current_value='2.3', target_value='v1.2.3', similarity=0.502), ValueMatch(current_value='1.2', target_value='v1.2.3', similarity=0.484), ValueMatch(current_value='5.0', target_value='v5.0.1', similarity=0.481), ValueMatch(current_value='3.0', target_value='v3.0.5', similarity=0.479), ValueMatch(current_value='11.0', target_value='v1.0.0', similarity=0.348), ValueMatch(current_value='6.3', target_value='v6.6.6', similarity=0.333), ValueMatch(current_value='6.5', target_value='v6.6.6', similarity=0.332), ValueMatch(current_value='2.6', target_value='v6.6.6', similarity=0.332), ValueMatch(current_value='3.7', target_value='v7.7.7', similarity=0.314), ValueMatch(current_value='1.7', target_value='v7.7.7', similarity=0.313), ValueMatch(current_value='2.7', target_value='v7.7.7', similarity=0.313), ValueMatch(current_value='4.7', target_value='v7.7.7', similarity=0.301), ValueMatch(current_value='4.3', target_value='v4.4.4', similarity=0.297), ValueMatch(current_value='4.2', target_value='v4.4.4', similarity=0.296), ValueMatch(current_value='4.5', target_value='v4.4.4', similarity=0.296), ValueMatch(current_value='1.4', target_value='v4.4.4', similarity=0.296), ValueMatch(current_value='0.8', target_value='v8.0.0', similarity=0.288)]\n",
+ "source: Tumor_Size_cm target: schema_version score: 9.585999999999997\n",
+ "value_matches: [ValueMatch(current_value='6.5', target_value='6.5', similarity=1.0), ValueMatch(current_value='1.5', target_value='1.5', similarity=1.0), ValueMatch(current_value='1.0', target_value='1.0', similarity=1.0), ValueMatch(current_value='9.5', target_value='9.5', similarity=1.0), ValueMatch(current_value='4.2', target_value='4.2', similarity=1.0), ValueMatch(current_value='11.0', target_value='1.0', similarity=0.677), ValueMatch(current_value='6.3', target_value='3.6', similarity=0.447), ValueMatch(current_value='3.7', target_value='7.3', similarity=0.434), ValueMatch(current_value='8.5', target_value='5.8', similarity=0.406), ValueMatch(current_value='1.2', target_value='2.1', similarity=0.371), ValueMatch(current_value='5.0', target_value='0.5', similarity=0.345), ValueMatch(current_value='2.2', target_value='4.2', similarity=0.316), ValueMatch(current_value='5.5', target_value='1.5', similarity=0.306), ValueMatch(current_value='6.0', target_value='6.5', similarity=0.301), ValueMatch(current_value='2.6', target_value='6.5', similarity=0.3), ValueMatch(current_value='9.0', target_value='9.5', similarity=0.283), ValueMatch(current_value='0.9', target_value='9.5', similarity=0.283), ValueMatch(current_value='2.9', target_value='9.5', similarity=0.282), ValueMatch(current_value='3.9', target_value='9.5', similarity=0.28), ValueMatch(current_value='4.5', target_value='4.2', similarity=0.265), ValueMatch(current_value='7.0', target_value='7.3', similarity=0.264), ValueMatch(current_value='4.0', target_value='4.2', similarity=0.264), ValueMatch(current_value='2.7', target_value='7.3', similarity=0.263), ValueMatch(current_value='1.4', target_value='4.2', similarity=0.262), ValueMatch(current_value='1.7', target_value='7.3', similarity=0.262), ValueMatch(current_value='4.3', target_value='4.2', similarity=0.262), ValueMatch(current_value='13.5', target_value='1.5', similarity=0.254), ValueMatch(current_value='4.7', target_value='7.3', similarity=0.253), ValueMatch(current_value='0.8', target_value='5.8', similarity=0.252), ValueMatch(current_value='8.0', target_value='5.8', similarity=0.252), ValueMatch(current_value='3.8', target_value='5.8', similarity=0.25), ValueMatch(current_value='1.8', target_value='5.8', similarity=0.25)]\n",
+ "source: Tumor_Size_cm target: rna_integrity_number score: 13.383999999999999\n",
+ "value_matches: [ValueMatch(current_value='6.0', target_value='6.0.0', similarity=0.743), ValueMatch(current_value='8.0', target_value='8.0.0', similarity=0.743), ValueMatch(current_value='7.0', target_value='7.0.0', similarity=0.74), ValueMatch(current_value='4.0', target_value='4.0.0', similarity=0.732), ValueMatch(current_value='5.0', target_value='5.0.0', similarity=0.723), ValueMatch(current_value='3.0', target_value='3.0.0', similarity=0.723), ValueMatch(current_value='1.0', target_value='1.0.0', similarity=0.715), ValueMatch(current_value='1.2', target_value='1.2.1', similarity=0.693), ValueMatch(current_value='4.2', target_value='4.2.0', similarity=0.647), ValueMatch(current_value='1.4', target_value='3.1.4', similarity=0.63), ValueMatch(current_value='2.5', target_value='2.5.3', similarity=0.628), ValueMatch(current_value='3.2', target_value='5.3.2', similarity=0.628), ValueMatch(current_value='4.5', target_value='7.4.5', similarity=0.61), ValueMatch(current_value='11.0', target_value='1.0.0', similarity=0.481), ValueMatch(current_value='0.8', target_value='8.0.0', similarity=0.372), ValueMatch(current_value='4.7', target_value='7.4.5', similarity=0.31), ValueMatch(current_value='4.3', target_value='3.1.4', similarity=0.257), ValueMatch(current_value='1.3', target_value='3.1.4', similarity=0.25)]\n",
+ "source: Tumor_Size_cm target: workflow_version score: 10.624999999999998\n",
+ "value_matches: [ValueMatch(current_value='5.0', target_value='5.0', similarity=1.0), ValueMatch(current_value='1.2', target_value='1.2', similarity=1.0), ValueMatch(current_value='3.8', target_value='3.8', similarity=1.0), ValueMatch(current_value='8.5', target_value='8.5', similarity=1.0), ValueMatch(current_value='6.3', target_value='6.3', similarity=1.0), ValueMatch(current_value='1.4', target_value='14.4', similarity=0.72), ValueMatch(current_value='1.8', target_value='18.5', similarity=0.653), ValueMatch(current_value='6.0', target_value='16.0', similarity=0.644), ValueMatch(current_value='11.0', target_value='11.2', similarity=0.481), ValueMatch(current_value='9.5', target_value='9.9', similarity=0.436), ValueMatch(current_value='9.0', target_value='9.9', similarity=0.436), ValueMatch(current_value='0.9', target_value='9.9', similarity=0.436), ValueMatch(current_value='2.9', target_value='9.9', similarity=0.436), ValueMatch(current_value='3.9', target_value='9.9', similarity=0.432), ValueMatch(current_value='1.7', target_value='7.1', similarity=0.418), ValueMatch(current_value='4.2', target_value='2.4', similarity=0.411), ValueMatch(current_value='2.2', target_value='1.2', similarity=0.35), ValueMatch(current_value='5.5', target_value='5.0', similarity=0.321), ValueMatch(current_value='4.5', target_value='14.4', similarity=0.299), ValueMatch(current_value='4.0', target_value='14.4', similarity=0.299), ValueMatch(current_value='6.5', target_value='6.3', similarity=0.297), ValueMatch(current_value='2.6', target_value='6.3', similarity=0.297), ValueMatch(current_value='4.3', target_value='14.4', similarity=0.296), ValueMatch(current_value='4.7', target_value='14.4', similarity=0.284), ValueMatch(current_value='7.0', target_value='20.7', similarity=0.278), ValueMatch(current_value='2.7', target_value='20.7', similarity=0.278), ValueMatch(current_value='3.7', target_value='7.1', similarity=0.265), ValueMatch(current_value='8.0', target_value='8.5', similarity=0.261), ValueMatch(current_value='0.8', target_value='8.5', similarity=0.261)]\n",
+ "source: Tumor_Size_cm target: tumor_burden score: 14.288999999999998\n",
+ "value_matches: [ValueMatch(current_value='5.0', target_value='5.0', similarity=1.0), ValueMatch(current_value='4.0', target_value='4.0', similarity=1.0), ValueMatch(current_value='6.0', target_value='6.0', similarity=1.0), ValueMatch(current_value='8.0', target_value='8.0', similarity=1.0), ValueMatch(current_value='3.0', target_value='3.0', similarity=1.0), ValueMatch(current_value='1.0', target_value='1.0', similarity=1.0), ValueMatch(current_value='2.5', target_value='2.5', similarity=1.0), ValueMatch(current_value='7.0', target_value='7.0', similarity=1.0), ValueMatch(current_value='9.0', target_value='9.0', similarity=1.0), ValueMatch(current_value='1.5', target_value='1.5', similarity=1.0), ValueMatch(current_value='11.0', target_value='1.0', similarity=0.663), ValueMatch(current_value='0.9', target_value='9.0', similarity=0.428), ValueMatch(current_value='0.8', target_value='8.0', similarity=0.428), ValueMatch(current_value='5.2', target_value='2.5', similarity=0.389), ValueMatch(current_value='2.2', target_value='2.5', similarity=0.36), ValueMatch(current_value='5.5', target_value='5.0', similarity=0.338), ValueMatch(current_value='6.5', target_value='6.0', similarity=0.332), ValueMatch(current_value='6.3', target_value='6.0', similarity=0.326), ValueMatch(current_value='2.6', target_value='6.0', similarity=0.326), ValueMatch(current_value='8.5', target_value='8.0', similarity=0.311), ValueMatch(current_value='9.5', target_value='9.0', similarity=0.311), ValueMatch(current_value='1.7', target_value='7.0', similarity=0.308), ValueMatch(current_value='1.8', target_value='8.0', similarity=0.308), ValueMatch(current_value='2.9', target_value='9.0', similarity=0.305), ValueMatch(current_value='2.7', target_value='7.0', similarity=0.305), ValueMatch(current_value='3.9', target_value='9.0', similarity=0.305), ValueMatch(current_value='3.7', target_value='7.0', similarity=0.305), ValueMatch(current_value='3.8', target_value='8.0', similarity=0.305), ValueMatch(current_value='4.7', target_value='7.0', similarity=0.294), ValueMatch(current_value='4.5', target_value='4.0', similarity=0.292), ValueMatch(current_value='1.4', target_value='4.0', similarity=0.29), ValueMatch(current_value='4.3', target_value='4.0', similarity=0.287), ValueMatch(current_value='4.2', target_value='4.0', similarity=0.287), ValueMatch(current_value='13.5', target_value='1.5', similarity=0.259)]\n",
+ "source: Tumor_Size_cm target: margin_distance score: 18.062\n",
+ "Top k reranked columns: [('Tumor_Size_cm', 'circumferential_resection_margin', 19.412), ('Tumor_Size_cm', 'shortest_dimension', 18.829), ('Tumor_Size_cm', 'margin_distance', 18.062), ('Tumor_Size_cm', 'tumor_width_measurement', 16.862000000000002), ('Tumor_Size_cm', 'size_extraocular_nodule', 16.834), ('Tumor_Size_cm', 'analyte_quantity', 16.395000000000003), ('Tumor_Size_cm', 'tumor_largest_dimension_diameter', 15.901999999999996), ('Tumor_Size_cm', 'tumor_length_measurement', 15.675000000000002), ('Tumor_Size_cm', 'imaging_suv', 15.484000000000002), ('Tumor_Size_cm', 'tumor_thickness', 15.170000000000002), ('Tumor_Size_cm', 'greatest_tumor_dimension', 14.639), ('Tumor_Size_cm', 'tumor_burden', 14.288999999999998), ('Tumor_Size_cm', 'tumor_depth_measurement', 13.718000000000002), ('Tumor_Size_cm', 'rna_integrity_number', 13.383999999999999), ('Tumor_Size_cm', 'tumor_depth', 12.732999999999999), ('Tumor_Size_cm', 'body_surface_area', 11.617), ('Tumor_Size_cm', 'workflow_version', 10.624999999999998), ('Tumor_Size_cm', 'average_insert_size', 10.621), ('Tumor_Size_cm', 'rin', 9.982000000000001), ('Tumor_Size_cm', 'schema_version', 9.585999999999997)]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Country | \n",
+ " race | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ajcc_pathologic_t | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Histologic_type | \n",
+ " primary_diagnosis | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " FIGO_stage | \n",
+ " figo_stage | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " BMI | \n",
+ " average_base_quality | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Age | \n",
+ " age_at_last_exposure | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Race | \n",
+ " race | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Ethnicity | \n",
+ " ethnicity | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Gender | \n",
+ " primary_site | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Tumor_Focality | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Tumor_Size_cm | \n",
+ " circumferential_resection_margin | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 Country race\n",
+ "1 Path_Stage_Primary_Tumor-pT ajcc_pathologic_t\n",
+ "2 Histologic_type primary_diagnosis\n",
+ "3 FIGO_stage figo_stage\n",
+ "4 BMI average_base_quality\n",
+ "5 Age age_at_last_exposure\n",
+ "6 Race race\n",
+ "7 Ethnicity ethnicity\n",
+ "8 Gender primary_site\n",
+ "9 Tumor_Focality tumor_focality\n",
+ "10 Tumor_Size_cm circumferential_resection_margin"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "column_mappings = bdi.match_schema(dataset[columns], target=\"gdc\", method=\"two_phase\")\n",
+ "column_mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Generating a harmonized table\n",
+ "\n",
+ "After discovering a schema mapping, we can generate a new table (DataFrame) using the new column names from the GDC standard vocabulary.\n",
+ "\n",
+ "To do so using `bdi-kit`, we can use the function `materialize_mapping()` as follows. Note that the column headers have been renamed to the target schema."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " country_of_birth | \n",
+ " history_of_tumor_type | \n",
+ " figo_stage | \n",
+ " average_base_quality | \n",
+ " age_at_diagnosis | \n",
+ " race | \n",
+ " ethnicity | \n",
+ " gender | \n",
+ " tumor_focality | \n",
+ " tumor_width_measurement | \n",
+ " tumor_level_prostate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 38.88 | \n",
+ " 64.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 2.9 | \n",
+ " Anterior endometrium | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 39.76 | \n",
+ " 58.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ " Posterior endometrium | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 51.19 | \n",
+ " 50.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 4.5 | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 32.69 | \n",
+ " 75.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 29.40 | \n",
+ " 75.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 4.2 | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " II | \n",
+ " 35.42 | \n",
+ " 74.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 1.5 | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " United States | \n",
+ " Serous | \n",
+ " II | \n",
+ " 24.32 | \n",
+ " 85.0 | \n",
+ " Black or African American | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 3.8 | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " IA | \n",
+ " 34.06 | \n",
+ " 70.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 5.0 | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " country_of_birth history_of_tumor_type figo_stage average_base_quality \\\n",
+ "0 United States Endometrioid IA 38.88 \n",
+ "1 United States Endometrioid IA 39.76 \n",
+ "2 United States Endometrioid IA 51.19 \n",
+ "3 NaN Carcinosarcoma NaN NaN \n",
+ "4 United States Endometrioid IA 32.69 \n",
+ ".. ... ... ... ... \n",
+ "99 Ukraine Endometrioid IA 29.40 \n",
+ "100 Ukraine Endometrioid II 35.42 \n",
+ "101 United States Serous II 24.32 \n",
+ "102 Ukraine Serous IA 34.06 \n",
+ "103 Ukraine Serous NaN NaN \n",
+ "\n",
+ " age_at_diagnosis race ethnicity \\\n",
+ "0 64.0 White Not-Hispanic or Latino \n",
+ "1 58.0 White Not-Hispanic or Latino \n",
+ "2 50.0 White Not-Hispanic or Latino \n",
+ "3 NaN NaN NaN \n",
+ "4 75.0 White Not-Hispanic or Latino \n",
+ ".. ... ... ... \n",
+ "99 75.0 NaN NaN \n",
+ "100 74.0 NaN NaN \n",
+ "101 85.0 Black or African American Not-Hispanic or Latino \n",
+ "102 70.0 NaN NaN \n",
+ "103 NaN NaN NaN \n",
+ "\n",
+ " gender tumor_focality tumor_width_measurement tumor_level_prostate \n",
+ "0 Female Unifocal 2.9 Anterior endometrium \n",
+ "1 Female Unifocal 3.5 Posterior endometrium \n",
+ "2 Female Unifocal 4.5 Other, specify \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 Female Unifocal 3.5 Other, specify \n",
+ ".. ... ... ... ... \n",
+ "99 Female Unifocal 4.2 Other, specify \n",
+ "100 Female Unifocal 1.5 Other, specify \n",
+ "101 Female Unifocal 3.8 Other, specify \n",
+ "102 Female Unifocal 5.0 Other, specify \n",
+ "103 NaN NaN NaN NaN \n",
+ "\n",
+ "[104 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.materialize_mapping(dataset, column_mappings)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Generating a harmonized table with value mappings\n",
+ "\n",
+ "`bdi-kit` can also help with translation of the values from the source table to the target standard format.\n",
+ "\n",
+ "To this end, `bdi-kit` provides the function `match_values()` that automatically creates value mappings for each string column.\n",
+ "The output of `match_values()` can be fed to `materialize_mapping()` which materialized the final target using both schema and value mappings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "value_mappings = bdi.match_values(dataset, column_mapping=column_mappings, target=\"gdc\", method=\"tfidf\")\n",
+ "bdi.materialize_mapping(dataset, value_mappings)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Verifying and Correcting Automatic Mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Verifying the schema mappings\n",
+ "\n",
+ "Sometimes the mappings generated automatically may be incorrect or you may to want verify them individually.\n",
+ "To verify the suggested column mappings, `bdi-kit` offers additional APIs to visualize the data and make any modifications when necessary. \n",
+ "\n",
+ "For this example, we will use the column `Histologic_type`. We can start by exploring the columns most similar to `Histologic_type`. \n",
+ "\n",
+ "For this, we can use the `top_matches()` function. Here, we notice that `primary_diagnosis` could be a potential target column.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "100%|██████████| 1/1 [00:00<00:00, 17.62it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 1 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 734/734 [00:55<00:00, 13.25it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 734 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Histologic_type | \n",
+ " disease_type | \n",
+ " 0.539168 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Histologic_type | \n",
+ " sample_type | \n",
+ " 0.530217 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Histologic_type | \n",
+ " roots | \n",
+ " 0.525866 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Histologic_type | \n",
+ " history_of_tumor_type | \n",
+ " 0.524959 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Histologic_type | \n",
+ " additional_pathology_findings | \n",
+ " 0.517125 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Histologic_type | \n",
+ " specimen_type | \n",
+ " 0.511611 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Histologic_type | \n",
+ " morphologic_architectural_pattern | \n",
+ " 0.482257 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Histologic_type | \n",
+ " histone_variant | \n",
+ " 0.478656 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Histologic_type | \n",
+ " viral_hepatitis_serologies | \n",
+ " 0.471346 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Histologic_type | \n",
+ " chromosome | \n",
+ " 0.471309 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Histologic_type | \n",
+ " analyte_type_id | \n",
+ " 0.450211 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Histologic_type | \n",
+ " composition | \n",
+ " 0.446170 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Histologic_type | \n",
+ " described_cases | \n",
+ " 0.445148 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Histologic_type | \n",
+ " pathology_details | \n",
+ " 0.434320 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Histologic_type | \n",
+ " antigen | \n",
+ " 0.432611 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Histologic_type | \n",
+ " pathology_reports | \n",
+ " 0.429676 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Histologic_type | \n",
+ " cog_rhabdomyosarcoma_risk_group | \n",
+ " 0.420320 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Histologic_type | \n",
+ " biospecimen_type | \n",
+ " 0.420141 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Histologic_type | \n",
+ " analyte_type | \n",
+ " 0.403565 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Histologic_type | \n",
+ " single_cell_library | \n",
+ " 0.403199 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " Histologic_type | \n",
+ " laboratory_test | \n",
+ " 0.399581 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Histologic_type | \n",
+ " slide_images | \n",
+ " 0.398477 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " Histologic_type | \n",
+ " tumor_descriptor | \n",
+ " 0.396314 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Histologic_type | \n",
+ " sample_type_id | \n",
+ " 0.391940 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Histologic_type | \n",
+ " relationship_primary_diagnosis | \n",
+ " 0.391895 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Histologic_type | \n",
+ " icd_10_code | \n",
+ " 0.388311 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Histologic_type | \n",
+ " stain_type | \n",
+ " 0.385822 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Histologic_type | \n",
+ " histone_family | \n",
+ " 0.385273 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Histologic_type | \n",
+ " pathogenicity | \n",
+ " 0.383606 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Histologic_type | \n",
+ " contiguous_organ_invaded | \n",
+ " 0.383036 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Histologic_type | \n",
+ " aids_risk_factors | \n",
+ " 0.382602 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Histologic_type | \n",
+ " icd_10 | \n",
+ " 0.375303 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Histologic_type | \n",
+ " biospecimen_anatomic_site | \n",
+ " 0.372609 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Histologic_type | \n",
+ " primary_diagnosis | \n",
+ " 0.371001 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Histologic_type | \n",
+ " dysplasia_type | \n",
+ " 0.369152 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " Histologic_type | \n",
+ " tissue_type | \n",
+ " 0.367687 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Histologic_type | \n",
+ " gene_symbol | \n",
+ " 0.366179 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " Histologic_type | \n",
+ " sarcomatoid_present | \n",
+ " 0.354912 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " Histologic_type | \n",
+ " papillary_renal_cell_type | \n",
+ " 0.351789 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " Histologic_type | \n",
+ " hysterectomy_type | \n",
+ " 0.348222 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Histologic_type disease_type 0.539168\n",
+ "1 Histologic_type sample_type 0.530217\n",
+ "2 Histologic_type roots 0.525866\n",
+ "3 Histologic_type history_of_tumor_type 0.524959\n",
+ "4 Histologic_type additional_pathology_findings 0.517125\n",
+ "5 Histologic_type specimen_type 0.511611\n",
+ "6 Histologic_type morphologic_architectural_pattern 0.482257\n",
+ "7 Histologic_type histone_variant 0.478656\n",
+ "8 Histologic_type viral_hepatitis_serologies 0.471346\n",
+ "9 Histologic_type chromosome 0.471309\n",
+ "10 Histologic_type analyte_type_id 0.450211\n",
+ "11 Histologic_type composition 0.446170\n",
+ "12 Histologic_type described_cases 0.445148\n",
+ "13 Histologic_type pathology_details 0.434320\n",
+ "14 Histologic_type antigen 0.432611\n",
+ "15 Histologic_type pathology_reports 0.429676\n",
+ "16 Histologic_type cog_rhabdomyosarcoma_risk_group 0.420320\n",
+ "17 Histologic_type biospecimen_type 0.420141\n",
+ "18 Histologic_type analyte_type 0.403565\n",
+ "19 Histologic_type single_cell_library 0.403199\n",
+ "20 Histologic_type laboratory_test 0.399581\n",
+ "21 Histologic_type slide_images 0.398477\n",
+ "22 Histologic_type tumor_descriptor 0.396314\n",
+ "23 Histologic_type sample_type_id 0.391940\n",
+ "24 Histologic_type relationship_primary_diagnosis 0.391895\n",
+ "25 Histologic_type icd_10_code 0.388311\n",
+ "26 Histologic_type stain_type 0.385822\n",
+ "27 Histologic_type histone_family 0.385273\n",
+ "28 Histologic_type pathogenicity 0.383606\n",
+ "29 Histologic_type contiguous_organ_invaded 0.383036\n",
+ "30 Histologic_type aids_risk_factors 0.382602\n",
+ "31 Histologic_type icd_10 0.375303\n",
+ "32 Histologic_type biospecimen_anatomic_site 0.372609\n",
+ "33 Histologic_type primary_diagnosis 0.371001\n",
+ "34 Histologic_type dysplasia_type 0.369152\n",
+ "35 Histologic_type tissue_type 0.367687\n",
+ "36 Histologic_type gene_symbol 0.366179\n",
+ "37 Histologic_type sarcomatoid_present 0.354912\n",
+ "38 Histologic_type papillary_renal_cell_type 0.351789\n",
+ "39 Histologic_type hysterectomy_type 0.348222"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hist_type_matches = bdi.top_matches(dataset, columns=[\"Histologic_type\"], target=\"gdc\", top_k=40)\n",
+ "hist_type_matches"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Viewing the column domains\n",
+ "\n",
+ "To verify that `primary_diagnosis` is a good target column, we view and compare the domains of each column using the `preview_domain()` function. For the source table, it returns the list of unique values in the source column. For the GDC target, it returns the list of unique valid values that a column can have.\n",
+ "\n",
+ "Here we see that the values seem to be related."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Serous | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Clear cell | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name\n",
+ "0 Endometrioid\n",
+ "1 Carcinosarcoma\n",
+ "2 Serous\n",
+ "3 Clear cell"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(dataset, \"Histologic_type\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ " value_description | \n",
+ " column_description | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Abdominal desmoid | \n",
+ " An insidious poorly circumscribed neoplasm ari... | \n",
+ " Text term used to describe the patient's histo... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Abdominal fibromatosis | \n",
+ " An insidious poorly circumscribed neoplasm ari... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Achromic nevus | \n",
+ " A benign nevus characterized by the absence of... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Acidophil adenocarcinoma | \n",
+ " A malignant epithelial neoplasm of the anterio... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Acidophil adenoma | \n",
+ " An epithelial neoplasm of the anterior pituita... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2620 | \n",
+ " Wolffian duct tumor | \n",
+ " An epithelial neoplasm of the female reproduct... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2621 | \n",
+ " Xanthofibroma | \n",
+ " A benign neoplasm composed of fibroblastic spi... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2622 | \n",
+ " Yolk sac tumor | \n",
+ " A non-seminomatous malignant germ cell tumor c... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2623 | \n",
+ " Unknown | \n",
+ " Not known, not observed, not recorded, or refu... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2624 | \n",
+ " Not Reported | \n",
+ " Not provided or available. | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2625 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name \\\n",
+ "0 Abdominal desmoid \n",
+ "1 Abdominal fibromatosis \n",
+ "2 Achromic nevus \n",
+ "3 Acidophil adenocarcinoma \n",
+ "4 Acidophil adenoma \n",
+ "... ... \n",
+ "2620 Wolffian duct tumor \n",
+ "2621 Xanthofibroma \n",
+ "2622 Yolk sac tumor \n",
+ "2623 Unknown \n",
+ "2624 Not Reported \n",
+ "\n",
+ " value_description \\\n",
+ "0 An insidious poorly circumscribed neoplasm ari... \n",
+ "1 An insidious poorly circumscribed neoplasm ari... \n",
+ "2 A benign nevus characterized by the absence of... \n",
+ "3 A malignant epithelial neoplasm of the anterio... \n",
+ "4 An epithelial neoplasm of the anterior pituita... \n",
+ "... ... \n",
+ "2620 An epithelial neoplasm of the female reproduct... \n",
+ "2621 A benign neoplasm composed of fibroblastic spi... \n",
+ "2622 A non-seminomatous malignant germ cell tumor c... \n",
+ "2623 Not known, not observed, not recorded, or refu... \n",
+ "2624 Not provided or available. \n",
+ "\n",
+ " column_description \n",
+ "0 Text term used to describe the patient's histo... \n",
+ "1 \n",
+ "2 \n",
+ "3 \n",
+ "4 \n",
+ "... ... \n",
+ "2620 \n",
+ "2621 \n",
+ "2622 \n",
+ "2623 \n",
+ "2624 \n",
+ "\n",
+ "[2625 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(\"gdc\", \"primary_diagnosis\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Since `primary_diagnosis` looks like a correct match for `Histologic_type`, we can modify the `column_mappings` variable directly."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Country | \n",
+ " country_of_birth | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Histologic_type | \n",
+ " primary_diagnosis | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " FIGO_stage | \n",
+ " irs_stage | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BMI | \n",
+ " age_at_diagnosis | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Age | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Race | \n",
+ " race | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Ethnicity | \n",
+ " ethnicity | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Gender | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Tumor_Focality | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Tumor_Size_cm | \n",
+ " tumor_depth | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Tumor_Site | \n",
+ " tumor_shape | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 Country country_of_birth\n",
+ "1 Histologic_type primary_diagnosis\n",
+ "2 FIGO_stage irs_stage\n",
+ "3 BMI age_at_diagnosis\n",
+ "4 Age weight\n",
+ "5 Race race\n",
+ "6 Ethnicity ethnicity\n",
+ "7 Gender gender\n",
+ "8 Tumor_Focality tumor_focality\n",
+ "9 Tumor_Size_cm tumor_depth\n",
+ "10 Tumor_Site tumor_shape"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "column_mappings.loc[column_mappings[\"source\"] == \"Histologic_type\", \"target\"] = \"primary_diagnosis\"\n",
+ "column_mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Finding correct value mappings\n",
+ "\n",
+ "After finding the correct column, we need to find appropriate value mappings. \n",
+ "Using `match_values()`, we can inspect what the possible value mappings for this would look like after the harmonization.\n",
+ "\n",
+ "`bdi-kit` implements multiple methods for value mapping discovery, including:\n",
+ "\n",
+ " - `edit_distance` - Computes value similarities using Levenstein's edit distance measure.\n",
+ " - `tfidf` - A method based on tf-idf importance weighting computed over charcter n-grams.\n",
+ " - `embeddings` - Uses BERT word embeddings to compute \"semantic similarity\" between the values.\n",
+ "\n",
+ "To specify a value mapping approach, we can pass the `method` parameter."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ " 0.848485 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Clear cell | \n",
+ " Clear cell adenoma | \n",
+ " 0.714286 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " Stromal endometriosis | \n",
+ " 0.666667 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Neuronevus | \n",
+ " 0.625000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS 0.848485\n",
+ "1 Clear cell Clear cell adenoma 0.714286\n",
+ "2 Endometrioid Stromal endometriosis 0.666667\n",
+ "3 Serous Neuronevus 0.625000"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"edit_distance\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ " 0.969 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " 0.897 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Clear cell adenoma | \n",
+ " 0.853 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous carcinoma, NOS | \n",
+ " 0.755 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS 0.969\n",
+ "1 Endometrioid Endometrioid adenoma, NOS 0.897\n",
+ "2 Clear cell Clear cell adenoma 0.853\n",
+ "3 Serous Serous carcinoma, NOS 0.755"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"tfidf\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinofibroma | \n",
+ " 0.919 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Endometrioid cystadenocarcinoma | \n",
+ " 0.810 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Clear cell carcinoma | \n",
+ " 0.760 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous cystoma | \n",
+ " 0.661 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinofibroma 0.919\n",
+ "1 Endometrioid Endometrioid cystadenocarcinoma 0.810\n",
+ "2 Clear cell Clear cell carcinoma 0.760\n",
+ "3 Serous Serous cystoma 0.661"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"embedding\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Clear cell | \n",
+ " Clear cell adenocarcinoma, NOS | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " Endometrioid carcinoma | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous cystadenocarcinoma | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS\n",
+ "1 Clear cell Clear cell adenocarcinoma, NOS\n",
+ "2 Endometrioid Endometrioid carcinoma\n",
+ "3 Serous Serous cystadenocarcinoma"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hist_type_vmap = pd.DataFrame(\n",
+ " columns=[\"source\", \"target\"],\n",
+ " data=[\n",
+ " (\"Carcinosarcoma\", \"Carcinosarcoma, NOS\"),\n",
+ " (\"Clear cell\", \"Clear cell adenocarcinoma, NOS\"),\n",
+ " (\"Endometrioid\", \"Endometrioid carcinoma\"),\n",
+ " (\"Serous\", \"Serous cystadenocarcinoma\"),\n",
+ " ],\n",
+ ")\n",
+ "hist_type_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Verifying multiple value mappings at once\n",
+ "\n",
+ "Besides verifying value mappings individually, you can also do it for all column mappings at once."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Histologic_type | \n",
+ " disease_type | \n",
+ " 0.539168 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Histologic_type | \n",
+ " sample_type | \n",
+ " 0.530217 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Histologic_type | \n",
+ " roots | \n",
+ " 0.525866 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Histologic_type | \n",
+ " history_of_tumor_type | \n",
+ " 0.524959 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Histologic_type | \n",
+ " additional_pathology_findings | \n",
+ " 0.517125 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Histologic_type | \n",
+ " specimen_type | \n",
+ " 0.511611 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Histologic_type | \n",
+ " morphologic_architectural_pattern | \n",
+ " 0.482257 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Histologic_type | \n",
+ " histone_variant | \n",
+ " 0.478656 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Histologic_type | \n",
+ " viral_hepatitis_serologies | \n",
+ " 0.471346 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Histologic_type | \n",
+ " chromosome | \n",
+ " 0.471309 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Histologic_type disease_type 0.539168\n",
+ "1 Histologic_type sample_type 0.530217\n",
+ "2 Histologic_type roots 0.525866\n",
+ "3 Histologic_type history_of_tumor_type 0.524959\n",
+ "4 Histologic_type additional_pathology_findings 0.517125\n",
+ "5 Histologic_type specimen_type 0.511611\n",
+ "6 Histologic_type morphologic_architectural_pattern 0.482257\n",
+ "7 Histologic_type histone_variant 0.478656\n",
+ "8 Histologic_type viral_hepatitis_serologies 0.471346\n",
+ "9 Histologic_type chromosome 0.471309"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hist_type_matches"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => disease_type (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Serous | \n",
+ " Cystic, Mucinous and Serous Neoplasms | \n",
+ " 0.563 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " Soft Tissue Tumors and Sarcomas, NOS | \n",
+ " 0.450 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Acinar Cell Neoplasms | \n",
+ " 0.319 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Serous Cystic, Mucinous and Serous Neoplasms 0.563\n",
+ "1 Carcinosarcoma Soft Tissue Tumors and Sarcomas, NOS 0.450\n",
+ "2 Clear cell Acinar Cell Neoplasms 0.319\n",
+ "3 Endometrioid None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => sample_type (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Mononuclear Cells from Bone Marrow Normal | \n",
+ " 0.502 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Mononuclear Cells from Bone Marrow Normal 0.502\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => history_of_tumor_type (coverage: 50.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Colorectal Cancer | \n",
+ " 0.313 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " Phenochromocytoma or Paraganglioma | \n",
+ " 0.309 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Colorectal Cancer 0.313\n",
+ "1 Carcinosarcoma Phenochromocytoma or Paraganglioma 0.309\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => additional_pathology_findings (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ " Endometriosis | \n",
+ " 0.746 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinoma in situ | \n",
+ " 0.643 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Diffuse and early nodular diabetic glomerulosc... | \n",
+ " 0.273 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target \\\n",
+ "0 Endometrioid Endometriosis \n",
+ "1 Carcinosarcoma Carcinoma in situ \n",
+ "2 Clear cell Diffuse and early nodular diabetic glomerulosc... \n",
+ "3 Serous None \n",
+ "\n",
+ " similarity \n",
+ "0 0.746 \n",
+ "1 0.643 \n",
+ "2 0.273 \n",
+ "3 NaN "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => specimen_type (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Cell | \n",
+ " 0.615 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Serous | \n",
+ " Serum | \n",
+ " 0.369 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Carcinosarcoma | \n",
+ " Bone Marrow Components NOS | \n",
+ " 0.272 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Cell 0.615\n",
+ "1 Serous Serum 0.369\n",
+ "2 Carcinosarcoma Bone Marrow Components NOS 0.272\n",
+ "3 Endometrioid None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => morphologic_architectural_pattern (coverage: 50.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Papillary Renal Cell | \n",
+ " 0.516 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " Papillary, NOS | \n",
+ " 0.259 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Papillary Renal Cell 0.516\n",
+ "1 Carcinosarcoma Papillary, NOS 0.259\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => histone_variant (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ " Not Reported | \n",
+ " 0.269 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Endometrioid Not Reported 0.269\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Clear cell None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => viral_hepatitis_serologies (coverage: 0.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Serous | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Serous None None\n",
+ "1 Carcinosarcoma None None\n",
+ "2 Clear cell None None\n",
+ "3 Endometrioid None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => chromosome (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ " Not Reported | \n",
+ " 0.28 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Endometrioid Not Reported 0.28\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Clear cell None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => analyte_type_id (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Serous | \n",
+ " S | \n",
+ " 0.414 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " D | \n",
+ " 0.298 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " E | \n",
+ " 0.269 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Serous S 0.414\n",
+ "1 Endometrioid D 0.298\n",
+ "2 Clear cell E 0.269\n",
+ "3 Carcinosarcoma None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => composition (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Cell | \n",
+ " 0.621 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Serous | \n",
+ " Serum | \n",
+ " 0.374 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Carcinosarcoma | \n",
+ " Bone Marrow Components NOS | \n",
+ " 0.279 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Cell 0.621\n",
+ "1 Serous Serum 0.374\n",
+ "2 Carcinosarcoma Bone Marrow Components NOS 0.279\n",
+ "3 Endometrioid None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => antigen (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Squamous Cell Carcinoma Antigen (SCCA) | \n",
+ " 0.514 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Clear cell | \n",
+ " CEA | \n",
+ " 0.342 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Serous | \n",
+ " NSE | \n",
+ " 0.280 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Squamous Cell Carcinoma Antigen (SCCA) 0.514\n",
+ "1 Clear cell CEA 0.342\n",
+ "2 Serous NSE 0.280\n",
+ "3 Endometrioid None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => cog_rhabdomyosarcoma_risk_group (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ " Intermediate Risk | \n",
+ " 0.277 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Endometrioid Intermediate Risk 0.277\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Clear cell None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => biospecimen_type (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Serous | \n",
+ " Serum | \n",
+ " 0.365 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " Buccal Mucosa | \n",
+ " 0.295 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Muscle Tissue | \n",
+ " 0.271 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Serous Serum 0.365\n",
+ "1 Carcinosarcoma Buccal Mucosa 0.295\n",
+ "2 Clear cell Muscle Tissue 0.271\n",
+ "3 Endometrioid None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => analyte_type (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Nuclei RNA | \n",
+ " 0.313 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Nuclei RNA 0.313\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => single_cell_library (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Chromium scATAC v1 Library | \n",
+ " 0.27 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Chromium scATAC v1 Library 0.27\n",
+ "1 Endometrioid None NaN\n",
+ "2 Clear cell None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => laboratory_test (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Cellularity | \n",
+ " 0.392 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Cellularity 0.392\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => tumor_descriptor (coverage: 50.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Not Allowed To Collect | \n",
+ " 0.382 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " NOS | \n",
+ " 0.302 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Not Allowed To Collect 0.382\n",
+ "1 Carcinosarcoma NOS 0.302\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => sample_type_id (coverage: 0.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Serous | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Serous None None\n",
+ "1 Carcinosarcoma None None\n",
+ "2 Clear cell None None\n",
+ "3 Endometrioid None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => relationship_primary_diagnosis (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Sarcoma | \n",
+ " 0.697 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Clear cell | \n",
+ " Basal Cell Cancer | \n",
+ " 0.461 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " Thyroid Cancer | \n",
+ " 0.253 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Sarcoma 0.697\n",
+ "1 Clear cell Basal Cell Cancer 0.461\n",
+ "2 Endometrioid Thyroid Cancer 0.253\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => stain_type (coverage: 0.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Serous | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Serous None None\n",
+ "1 Carcinosarcoma None None\n",
+ "2 Clear cell None None\n",
+ "3 Endometrioid None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => histone_family (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ " Not Reported | \n",
+ " 0.258 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Endometrioid Not Reported 0.258\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Clear cell None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => pathogenicity (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Likely Pathogenic | \n",
+ " 0.252 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Likely Pathogenic 0.252\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => contiguous_organ_invaded (coverage: 25.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Small Bowel | \n",
+ " 0.262 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Small Bowel 0.262\n",
+ "1 Carcinosarcoma None NaN\n",
+ "2 Endometrioid None NaN\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => aids_risk_factors (coverage: 75.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ " Coccidioidomycosis | \n",
+ " 0.377 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Clear cell | \n",
+ " Salmonella Septicemia | \n",
+ " 0.305 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Carcinosarcoma | \n",
+ " Nocardiosis | \n",
+ " 0.283 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Endometrioid Coccidioidomycosis 0.377\n",
+ "1 Clear cell Salmonella Septicemia 0.305\n",
+ "2 Carcinosarcoma Nocardiosis 0.283\n",
+ "3 Serous None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => biospecimen_anatomic_site (coverage: 100.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Clear cell | \n",
+ " Cell-Line | \n",
+ " 0.474 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Abdomen | \n",
+ " 0.376 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Serous | \n",
+ " Venous | \n",
+ " 0.318 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Carcinosarcoma | \n",
+ " Stomach - Mucosa Only | \n",
+ " 0.270 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Clear cell Cell-Line 0.474\n",
+ "1 Endometrioid Abdomen 0.376\n",
+ "2 Serous Venous 0.318\n",
+ "3 Carcinosarcoma Stomach - Mucosa Only 0.270"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Histologic_type => primary_diagnosis (coverage: 100.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ " 0.969 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " 0.897 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Clear cell adenoma | \n",
+ " 0.853 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous carcinoma, NOS | \n",
+ " 0.755 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS 0.969\n",
+ "1 Endometrioid Endometrioid adenoma, NOS 0.897\n",
+ "2 Clear cell Clear cell adenoma 0.853\n",
+ "3 Serous Serous carcinoma, NOS 0.755"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "mappings = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=hist_type_matches.head(34),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ " # default_missing=None\n",
+ ")\n",
+ "\n",
+ "for mapping in mappings:\n",
+ " mapping.attrs['default_missing'] = \"___\"\n",
+ " print(f\"{mapping.attrs['source']} => {mapping.attrs['target']} (coverage: {mapping.attrs['coverage']:.2%})\")\n",
+ " display(mapping)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ " 0.969 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " 0.897 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Clear cell adenoma | \n",
+ " 0.853 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous carcinoma, NOS | \n",
+ " 0.755 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS 0.969\n",
+ "1 Endometrioid Endometrioid adenoma, NOS 0.897\n",
+ "2 Clear cell Clear cell adenoma 0.853\n",
+ "3 Serous Serous carcinoma, NOS 0.755"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "scores = [\n",
+ " np.sum(m[\"similarity\"])\n",
+ " for m in mappings\n",
+ "]\n",
+ "scores\n",
+ "sorted_mappings = [m for _, m in sorted(zip(scores, mappings), key=lambda it: it[0], reverse=True)]\n",
+ "sorted_mappings[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "100%|██████████| 1/1 [00:00<00:00, 11.98it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 1 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 734/734 [00:52<00:00, 13.92it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 734 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " uicc_pathologic_t | \n",
+ " 0.677620 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ajcc_pathologic_t | \n",
+ " 0.660834 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ensat_pathologic_t | \n",
+ " 0.660739 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " uicc_clinical_t | \n",
+ " 0.645708 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ajcc_clinical_t | \n",
+ " 0.592968 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " extrathyroid_extension | \n",
+ " 0.589977 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " uicc_pathologic_stage | \n",
+ " 0.584754 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " somatic_mutation_indexes | \n",
+ " 0.571665 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ajcc_pathologic_stage | \n",
+ " 0.563254 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " annotated_somatic_mutations | \n",
+ " 0.563235 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " masked_somatic_mutations | \n",
+ " 0.536071 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " figo_stage | \n",
+ " 0.535757 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " tumor_level_prostate | \n",
+ " 0.525700 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ensat_pathologic_stage | \n",
+ " 0.524734 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " uicc_clinical_stage | \n",
+ " 0.512399 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " tumor_grade_category | \n",
+ " 0.510672 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ajcc_clinical_stage | \n",
+ " 0.501770 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " inss_stage | \n",
+ " 0.477118 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " adapter_sequence | \n",
+ " 0.473948 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " margin_distance | \n",
+ " 0.438898 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " masaoka_stage | \n",
+ " 0.434978 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " enneking_msts_tumor_site | \n",
+ " 0.433908 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " tumor_regression_grade | \n",
+ " 0.421245 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " tumor_grade | \n",
+ " 0.401092 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " aggregated_somatic_mutations | \n",
+ " 0.397975 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " fastq_name | \n",
+ " 0.391604 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " route_of_administration | \n",
+ " 0.388670 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " distance_normal_to_tumor | \n",
+ " 0.381779 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " target_capture_kit_catalog_number | \n",
+ " 0.378923 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " aa_change | \n",
+ " 0.376404 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " submitted_aligned_reads_files | \n",
+ " 0.375468 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " shortest_dimension | \n",
+ " 0.371787 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " submitted_genomic_profiles | \n",
+ " 0.371026 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " best_overall_response | \n",
+ " 0.366447 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " data_subtypes | \n",
+ " 0.363905 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " ann_arbor_pathologic_stage | \n",
+ " 0.361731 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " zone_of_origin_prostate | \n",
+ " 0.360281 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " simple_germline_variations | \n",
+ " 0.358423 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " adverse_event_grade | \n",
+ " 0.357383 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " simple_somatic_mutations | \n",
+ " 0.353099 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Path_Stage_Primary_Tumor-pT uicc_pathologic_t 0.677620\n",
+ "1 Path_Stage_Primary_Tumor-pT ajcc_pathologic_t 0.660834\n",
+ "2 Path_Stage_Primary_Tumor-pT ensat_pathologic_t 0.660739\n",
+ "3 Path_Stage_Primary_Tumor-pT uicc_clinical_t 0.645708\n",
+ "4 Path_Stage_Primary_Tumor-pT ajcc_clinical_t 0.592968\n",
+ "5 Path_Stage_Primary_Tumor-pT extrathyroid_extension 0.589977\n",
+ "6 Path_Stage_Primary_Tumor-pT uicc_pathologic_stage 0.584754\n",
+ "7 Path_Stage_Primary_Tumor-pT somatic_mutation_indexes 0.571665\n",
+ "8 Path_Stage_Primary_Tumor-pT ajcc_pathologic_stage 0.563254\n",
+ "9 Path_Stage_Primary_Tumor-pT annotated_somatic_mutations 0.563235\n",
+ "10 Path_Stage_Primary_Tumor-pT masked_somatic_mutations 0.536071\n",
+ "11 Path_Stage_Primary_Tumor-pT figo_stage 0.535757\n",
+ "12 Path_Stage_Primary_Tumor-pT tumor_level_prostate 0.525700\n",
+ "13 Path_Stage_Primary_Tumor-pT ensat_pathologic_stage 0.524734\n",
+ "14 Path_Stage_Primary_Tumor-pT uicc_clinical_stage 0.512399\n",
+ "15 Path_Stage_Primary_Tumor-pT tumor_grade_category 0.510672\n",
+ "16 Path_Stage_Primary_Tumor-pT ajcc_clinical_stage 0.501770\n",
+ "17 Path_Stage_Primary_Tumor-pT inss_stage 0.477118\n",
+ "18 Path_Stage_Primary_Tumor-pT adapter_sequence 0.473948\n",
+ "19 Path_Stage_Primary_Tumor-pT margin_distance 0.438898\n",
+ "20 Path_Stage_Primary_Tumor-pT masaoka_stage 0.434978\n",
+ "21 Path_Stage_Primary_Tumor-pT enneking_msts_tumor_site 0.433908\n",
+ "22 Path_Stage_Primary_Tumor-pT tumor_regression_grade 0.421245\n",
+ "23 Path_Stage_Primary_Tumor-pT tumor_grade 0.401092\n",
+ "24 Path_Stage_Primary_Tumor-pT aggregated_somatic_mutations 0.397975\n",
+ "25 Path_Stage_Primary_Tumor-pT fastq_name 0.391604\n",
+ "26 Path_Stage_Primary_Tumor-pT route_of_administration 0.388670\n",
+ "27 Path_Stage_Primary_Tumor-pT distance_normal_to_tumor 0.381779\n",
+ "28 Path_Stage_Primary_Tumor-pT target_capture_kit_catalog_number 0.378923\n",
+ "29 Path_Stage_Primary_Tumor-pT aa_change 0.376404\n",
+ "30 Path_Stage_Primary_Tumor-pT submitted_aligned_reads_files 0.375468\n",
+ "31 Path_Stage_Primary_Tumor-pT shortest_dimension 0.371787\n",
+ "32 Path_Stage_Primary_Tumor-pT submitted_genomic_profiles 0.371026\n",
+ "33 Path_Stage_Primary_Tumor-pT best_overall_response 0.366447\n",
+ "34 Path_Stage_Primary_Tumor-pT data_subtypes 0.363905\n",
+ "35 Path_Stage_Primary_Tumor-pT ann_arbor_pathologic_stage 0.361731\n",
+ "36 Path_Stage_Primary_Tumor-pT zone_of_origin_prostate 0.360281\n",
+ "37 Path_Stage_Primary_Tumor-pT simple_germline_variations 0.358423\n",
+ "38 Path_Stage_Primary_Tumor-pT adverse_event_grade 0.357383\n",
+ "39 Path_Stage_Primary_Tumor-pT simple_somatic_mutations 0.353099"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "path_stage_matches = bdi.top_matches(dataset, columns=[\"Path_Stage_Primary_Tumor-pT\"], target=\"gdc\", top_k=40)\n",
+ "path_stage_matches"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => uicc_pathologic_t (coverage: 100.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT1b (FIGO IB) | \n",
+ " T1b | \n",
+ " 0.534 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT1a (FIGO IA) | \n",
+ " T1a | \n",
+ " 0.509 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " T3b | \n",
+ " 0.436 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " T3a | \n",
+ " 0.418 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " Unknown | \n",
+ " 0.373 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT1 (FIGO I) | \n",
+ " T1 | \n",
+ " 0.289 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT2 (FIGO II) | \n",
+ " T2 | \n",
+ " 0.281 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT1b (FIGO IB) T1b 0.534\n",
+ "1 pT1a (FIGO IA) T1a 0.509\n",
+ "2 pT3b (FIGO IIIB) T3b 0.436\n",
+ "3 pT3a (FIGO IIIA) T3a 0.418\n",
+ "4 nan Unknown 0.373\n",
+ "5 pT1 (FIGO I) T1 0.289\n",
+ "6 pT2 (FIGO II) T2 0.281"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => ajcc_pathologic_t (coverage: 100.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT1b (FIGO IB) | \n",
+ " T1b | \n",
+ " 0.535 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT1a (FIGO IA) | \n",
+ " T1a | \n",
+ " 0.507 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " T3b | \n",
+ " 0.437 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " T3a | \n",
+ " 0.416 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " Unknown | \n",
+ " 0.348 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT1 (FIGO I) | \n",
+ " T1 | \n",
+ " 0.290 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT2 (FIGO II) | \n",
+ " T2 | \n",
+ " 0.281 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT1b (FIGO IB) T1b 0.535\n",
+ "1 pT1a (FIGO IA) T1a 0.507\n",
+ "2 pT3b (FIGO IIIB) T3b 0.437\n",
+ "3 pT3a (FIGO IIIA) T3a 0.416\n",
+ "4 nan Unknown 0.348\n",
+ "5 pT1 (FIGO I) T1 0.290\n",
+ "6 pT2 (FIGO II) T2 0.281"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => ensat_pathologic_t (coverage: 85.71%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT1 (FIGO I) | \n",
+ " T1 | \n",
+ " 0.424 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT2 (FIGO II) | \n",
+ " T2 | \n",
+ " 0.424 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pT1a (FIGO IA) | \n",
+ " T1 | \n",
+ " 0.311 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT1b (FIGO IB) | \n",
+ " T1 | \n",
+ " 0.302 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " T3 | \n",
+ " 0.260 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " T3 | \n",
+ " 0.256 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT1 (FIGO I) T1 0.424\n",
+ "1 pT2 (FIGO II) T2 0.424\n",
+ "2 pT1a (FIGO IA) T1 0.311\n",
+ "3 pT1b (FIGO IB) T1 0.302\n",
+ "4 pT3a (FIGO IIIA) T3 0.260\n",
+ "5 pT3b (FIGO IIIB) T3 0.256\n",
+ "6 nan None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => uicc_clinical_t (coverage: 100.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT1b (FIGO IB) | \n",
+ " T1b | \n",
+ " 0.536 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT1a (FIGO IA) | \n",
+ " T1a | \n",
+ " 0.512 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " T3b | \n",
+ " 0.437 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " T3a | \n",
+ " 0.418 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " Unknown | \n",
+ " 0.373 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT1 (FIGO I) | \n",
+ " T1 | \n",
+ " 0.297 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT2 (FIGO II) | \n",
+ " T2 | \n",
+ " 0.285 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT1b (FIGO IB) T1b 0.536\n",
+ "1 pT1a (FIGO IA) T1a 0.512\n",
+ "2 pT3b (FIGO IIIB) T3b 0.437\n",
+ "3 pT3a (FIGO IIIA) T3a 0.418\n",
+ "4 nan Unknown 0.373\n",
+ "5 pT1 (FIGO I) T1 0.297\n",
+ "6 pT2 (FIGO II) T2 0.285"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => ajcc_clinical_t (coverage: 100.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT1b (FIGO IB) | \n",
+ " T1b | \n",
+ " 0.537 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT1a (FIGO IA) | \n",
+ " T1a | \n",
+ " 0.509 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " T3b | \n",
+ " 0.437 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " T3a | \n",
+ " 0.416 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " Unknown | \n",
+ " 0.348 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT1 (FIGO I) | \n",
+ " T1 | \n",
+ " 0.298 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT2 (FIGO II) | \n",
+ " T2 | \n",
+ " 0.285 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT1b (FIGO IB) T1b 0.537\n",
+ "1 pT1a (FIGO IA) T1a 0.509\n",
+ "2 pT3b (FIGO IIIB) T3b 0.437\n",
+ "3 pT3a (FIGO IIIA) T3a 0.416\n",
+ "4 nan Unknown 0.348\n",
+ "5 pT1 (FIGO I) T1 0.298\n",
+ "6 pT2 (FIGO II) T2 0.285"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => extrathyroid_extension (coverage: 0.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT1b (FIGO IB) | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT1 (FIGO I) | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT1a (FIGO IA) | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT2 (FIGO II) | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT1b (FIGO IB) None None\n",
+ "1 pT3b (FIGO IIIB) None None\n",
+ "2 nan None None\n",
+ "3 pT1 (FIGO I) None None\n",
+ "4 pT3a (FIGO IIIA) None None\n",
+ "5 pT1a (FIGO IA) None None\n",
+ "6 pT2 (FIGO II) None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => uicc_pathologic_stage (coverage: 71.43%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " Stage IIIB | \n",
+ " 0.532 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " Stage IIIA | \n",
+ " 0.478 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " Unknown | \n",
+ " 0.379 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT1b (FIGO IB) | \n",
+ " Stage IB | \n",
+ " 0.339 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pT2 (FIGO II) | \n",
+ " Stage III | \n",
+ " 0.265 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT1a (FIGO IA) | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT1 (FIGO I) | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT3b (FIGO IIIB) Stage IIIB 0.532\n",
+ "1 pT3a (FIGO IIIA) Stage IIIA 0.478\n",
+ "2 nan Unknown 0.379\n",
+ "3 pT1b (FIGO IB) Stage IB 0.339\n",
+ "4 pT2 (FIGO II) Stage III 0.265\n",
+ "5 pT1a (FIGO IA) None NaN\n",
+ "6 pT1 (FIGO I) None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Path_Stage_Primary_Tumor-pT => ajcc_pathologic_stage (coverage: 71.43%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " pT3b (FIGO IIIB) | \n",
+ " Stage IIIB | \n",
+ " 0.532 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " Stage IIIA | \n",
+ " 0.478 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " Unknown | \n",
+ " 0.379 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT1b (FIGO IB) | \n",
+ " Stage IB | \n",
+ " 0.339 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pT2 (FIGO II) | \n",
+ " Stage III | \n",
+ " 0.265 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pT1a (FIGO IA) | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT1 (FIGO I) | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 pT3b (FIGO IIIB) Stage IIIB 0.532\n",
+ "1 pT3a (FIGO IIIA) Stage IIIA 0.478\n",
+ "2 nan Unknown 0.379\n",
+ "3 pT1b (FIGO IB) Stage IB 0.339\n",
+ "4 pT2 (FIGO II) Stage III 0.265\n",
+ "5 pT1a (FIGO IA) None NaN\n",
+ "6 pT1 (FIGO I) None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "mappings = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=path_stage_matches.head(10),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ " # default_missing=None\n",
+ ")\n",
+ "\n",
+ "for mapping in mappings:\n",
+ " mapping.attrs['default_missing'] = \"___\"\n",
+ " print(f\"{mapping.attrs['source']} => {mapping.attrs['target']} (coverage: {mapping.attrs['coverage']:.2%})\")\n",
+ " display(mapping)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'mappings' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 2\u001b[0m scores \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 3\u001b[0m np\u001b[38;5;241m.\u001b[39msum(m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msimilarity\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m m \u001b[38;5;129;01min\u001b[39;00m \u001b[43mmappings\u001b[49m\n\u001b[1;32m 5\u001b[0m ]\n\u001b[1;32m 6\u001b[0m scores\n\u001b[1;32m 7\u001b[0m sorted_mappings \u001b[38;5;241m=\u001b[39m [m \u001b[38;5;28;01mfor\u001b[39;00m _, m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(\u001b[38;5;28mzip\u001b[39m(scores, mappings), key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mlambda\u001b[39;00m it: it[\u001b[38;5;241m0\u001b[39m], reverse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)]\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'mappings' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "scores = [\n",
+ " np.sum(m[\"similarity\"])\n",
+ " for m in mappings\n",
+ "]\n",
+ "scores\n",
+ "sorted_mappings = [m for _, m in sorted(zip(scores, mappings), key=lambda it: it[0], reverse=True)]\n",
+ "print(scores[:2])\n",
+ "print(sorted_mappings[1].attrs[\"target\"])\n",
+ "sorted_mappings[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "100%|██████████| 1/1 [00:00<00:00, 17.54it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 1 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 734/734 [00:48<00:00, 15.13it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 734 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Country | \n",
+ " country_of_birth | \n",
+ " 0.491808 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Country | \n",
+ " country_of_residence_at_enrollment | \n",
+ " 0.419452 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Country | \n",
+ " oct_embedded | \n",
+ " 0.371549 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Country | \n",
+ " race | \n",
+ " 0.352190 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Country | \n",
+ " submission_enabled | \n",
+ " 0.325194 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Country | \n",
+ " zone_of_origin_prostate | \n",
+ " 0.281882 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Country | \n",
+ " is_legacy | \n",
+ " 0.279952 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Country | \n",
+ " released | \n",
+ " 0.271991 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Country | \n",
+ " non_nodal_regional_disease | \n",
+ " 0.268099 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Country | \n",
+ " perineural_invasion_present | \n",
+ " 0.225525 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Country | \n",
+ " ethnicity | \n",
+ " 0.217912 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Country | \n",
+ " consent_type | \n",
+ " 0.210355 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Country | \n",
+ " request_submission | \n",
+ " 0.208036 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Country | \n",
+ " releasable | \n",
+ " 0.207206 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Country | \n",
+ " project_id | \n",
+ " 0.204526 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Country | \n",
+ " overrepresented_sequences | \n",
+ " 0.203413 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Country | \n",
+ " year_of_diagnosis | \n",
+ " 0.192407 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Country | \n",
+ " status | \n",
+ " 0.189337 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Country | \n",
+ " ulceration_indicator | \n",
+ " 0.188119 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Country | \n",
+ " vascular_invasion_present | \n",
+ " 0.188106 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Country country_of_birth 0.491808\n",
+ "1 Country country_of_residence_at_enrollment 0.419452\n",
+ "2 Country oct_embedded 0.371549\n",
+ "3 Country race 0.352190\n",
+ "4 Country submission_enabled 0.325194\n",
+ "5 Country zone_of_origin_prostate 0.281882\n",
+ "6 Country is_legacy 0.279952\n",
+ "7 Country released 0.271991\n",
+ "8 Country non_nodal_regional_disease 0.268099\n",
+ "9 Country perineural_invasion_present 0.225525\n",
+ "10 Country ethnicity 0.217912\n",
+ "11 Country consent_type 0.210355\n",
+ "12 Country request_submission 0.208036\n",
+ "13 Country releasable 0.207206\n",
+ "14 Country project_id 0.204526\n",
+ "15 Country overrepresented_sequences 0.203413\n",
+ "16 Country year_of_diagnosis 0.192407\n",
+ "17 Country status 0.189337\n",
+ "18 Country ulceration_indicator 0.188119\n",
+ "19 Country vascular_invasion_present 0.188106"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "country_matches = bdi.top_matches(dataset, columns=[\"Country\"], target=\"gdc\", top_k=20)\n",
+ "country_matches"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => country_of_birth (coverage: 60.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " United States | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Poland | \n",
+ " Poland | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " Ukraine | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States United States 1.0\n",
+ "1 Poland Poland 1.0\n",
+ "2 Ukraine Ukraine 1.0\n",
+ "3 nan None NaN\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => country_of_residence_at_enrollment (coverage: 60.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " United States | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Poland | \n",
+ " Poland | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " Ukraine | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States United States 1.0\n",
+ "1 Poland Poland 1.0\n",
+ "2 Ukraine Ukraine 1.0\n",
+ "3 nan None NaN\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => race (coverage: 80.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Other_specify | \n",
+ " other | \n",
+ " 0.502 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " american indian or alaska native | \n",
+ " 0.344 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " white | \n",
+ " 0.297 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " native hawaiian or other pacific islander | \n",
+ " 0.274 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Other_specify other 0.502\n",
+ "1 nan american indian or alaska native 0.344\n",
+ "2 United States white 0.297\n",
+ "3 Poland native hawaiian or other pacific islander 0.274\n",
+ "4 Ukraine None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => zone_of_origin_prostate (coverage: 20.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Other_specify | \n",
+ " Peripheral zone | \n",
+ " 0.288 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Other_specify Peripheral zone 0.288\n",
+ "1 nan None NaN\n",
+ "2 Ukraine None NaN\n",
+ "3 Poland None NaN\n",
+ "4 United States None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => non_nodal_regional_disease (coverage: 20.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Indeterminate | \n",
+ " 0.325 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States Indeterminate 0.325\n",
+ "1 nan None NaN\n",
+ "2 Ukraine None NaN\n",
+ "3 Poland None NaN\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => perineural_invasion_present (coverage: 20.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Not Reported | \n",
+ " 0.325 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States Not Reported 0.325\n",
+ "1 nan None NaN\n",
+ "2 Ukraine None NaN\n",
+ "3 Poland None NaN\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => ethnicity (coverage: 0.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan None None\n",
+ "1 United States None None\n",
+ "2 Ukraine None None\n",
+ "3 Poland None None\n",
+ "4 Other_specify None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => consent_type (coverage: 0.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan None None\n",
+ "1 United States None None\n",
+ "2 Ukraine None None\n",
+ "3 Poland None None\n",
+ "4 Other_specify None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => overrepresented_sequences (coverage: 20.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Not Reported | \n",
+ " 0.338 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States Not Reported 0.338\n",
+ "1 nan None NaN\n",
+ "2 Ukraine None NaN\n",
+ "3 Poland None NaN\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => status (coverage: 0.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " nan | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 nan None None\n",
+ "1 United States None None\n",
+ "2 Ukraine None None\n",
+ "3 Poland None None\n",
+ "4 Other_specify None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => ulceration_indicator (coverage: 20.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Not Reported | \n",
+ " 0.315 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States Not Reported 0.315\n",
+ "1 nan None NaN\n",
+ "2 Ukraine None NaN\n",
+ "3 Poland None NaN\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Country => vascular_invasion_present (coverage: 20.00%)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Not Reported | \n",
+ " 0.325 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other_specify | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 United States Not Reported 0.325\n",
+ "1 nan None NaN\n",
+ "2 Ukraine None NaN\n",
+ "3 Poland None NaN\n",
+ "4 Other_specify None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "mappings = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=country_matches,\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ " # default_missing=None\n",
+ ")\n",
+ "\n",
+ "for mapping in mappings:\n",
+ " print(f\"{mapping.attrs['source']} => {mapping.attrs['target']} (coverage: {mapping.attrs['coverage']:.2%})\")\n",
+ " display(mapping)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Fixing remaining value mappings\n",
+ "\n",
+ "We need fix a few value mappings:\n",
+ "- Race\n",
+ "- Ethnicity\n",
+ "- Tumor_Site\n",
+ "\n",
+ "For race, we need to fix: `nan` -> `american indian or alaska native`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " White | \n",
+ " white | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Asian | \n",
+ " asian | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not Reported | \n",
+ " not reported | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Black or African American | \n",
+ " black or african american | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " american indian or alaska native | \n",
+ " 0.359 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 White white 1.000\n",
+ "1 Asian asian 1.000\n",
+ "2 Not Reported not reported 1.000\n",
+ "3 Black or African American black or african american 1.000\n",
+ "4 nan american indian or alaska native 0.359"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "race_vmap = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=(\"Race\", \"race\"),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "race_vmap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " White | \n",
+ " white | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Asian | \n",
+ " asian | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not Reported | \n",
+ " not reported | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Black or African American | \n",
+ " black or african american | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 White white 1.0\n",
+ "1 Asian asian 1.0\n",
+ "2 Not Reported not reported 1.0\n",
+ "3 Black or African American black or african american 1.0"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "race_vmap = race_vmap[race_vmap[\"similarity\"] >= 1.0]\n",
+ "race_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For `Ethnicity`, we need to fix: `Not reported` -> `not hispanic or latino`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Hispanic or Latino | \n",
+ " hispanic or latino | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Not-Hispanic or Latino | \n",
+ " not hispanic or latino | \n",
+ " 0.935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not reported | \n",
+ " not hispanic or latino | \n",
+ " 0.268 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Hispanic or Latino hispanic or latino 1.000\n",
+ "1 Not-Hispanic or Latino not hispanic or latino 0.935\n",
+ "2 Not reported not hispanic or latino 0.268\n",
+ "3 nan None NaN"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ethinicity_vmap = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=(\"Ethnicity\", \"ethnicity\"),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "ethinicity_vmap\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Hispanic or Latino | \n",
+ " hispanic or latino | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Not-Hispanic or Latino | \n",
+ " not hispanic or latino | \n",
+ " 0.935 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Hispanic or Latino hispanic or latino 1.000\n",
+ "1 Not-Hispanic or Latino not hispanic or latino 0.935"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ethinicity_vmap = ethinicity_vmap[ethinicity_vmap[\"similarity\"] > 0.9]\n",
+ "ethinicity_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For `Tumor_Site`, given that this dataset is about endometrial cancer, all values must be mapped to \"Endometrium\". So instead of fixing each mapping individually, we will write a custom function that returns \"Endometrium\" regardless of the input value. Later, we will show how to use this function to transform the dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Anterior endometrium | \n",
+ " Endometrium | \n",
+ " 0.852 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Posterior endometrium | \n",
+ " Endometrium | \n",
+ " 0.823 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Other, specify | \n",
+ " Other specified parts of pancreas | \n",
+ " 0.543 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " nan | \n",
+ " Anal canal | \n",
+ " 0.301 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Anterior endometrium Endometrium 0.852\n",
+ "1 Posterior endometrium Endometrium 0.823\n",
+ "2 Other, specify Other specified parts of pancreas 0.543\n",
+ "3 nan Anal canal 0.301"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Tumor_Site\", \"tissue_or_organ_of_origin\"), target=\"gdc\", method=\"tfidf\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Custom mapping function that will be used to map the values of the 'Tumor_Site' column\n",
+ "def map_tumor_site(source_value):\n",
+ " return \"Endometrium\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Combining custom user mappings with suggested mappings\n",
+ "\n",
+ "Before generating a final harmonized dataset, we can combine the automatically generated value mappings with the fixed mappings provided by the user. To do so, we use `bdi.merge_mappings()` functions, which take a list of mappings (e.g., generated automatically) and a list of \"user-defined mapping overrides\" that will be combined with the first list of mappings and will take precedence whenever they conflict.\n",
+ "\n",
+ "In our example below, all mappings specified in the variable `user_mappings` will override the mappings in `value_mappings` generated by the `bdi.match_values()` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from math import ceil\n",
+ "\n",
+ "user_mappings = [\n",
+ " {\n",
+ " # When no mapping is need, specifying the source and target is enough\n",
+ " \"source\": \"BMI\",\n",
+ " \"target\": \"bmi\",\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"Tumor_Size_cm\",\n",
+ " \"target\": \"tumor_largest_dimension_diameter\",\n",
+ " },\n",
+ " {\n",
+ " # mapper can be a custom Python function\n",
+ " \"source\": \"Tumor_Site\",\n",
+ " \"target\": \"tissue_or_organ_of_origin\",\n",
+ " \"mapper\": map_tumor_site,\n",
+ " },\n",
+ " {\n",
+ " # Lambda functions can also be used as mappers\n",
+ " \"source\": \"Age\",\n",
+ " \"target\": \"days_to_birth\",\n",
+ " \"mapper\": lambda age: -age * 365.25,\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"Age\",\n",
+ " \"target\": \"age_at_diagnosis\",\n",
+ " \"mapper\": lambda age: float(\"nan\") if pd.isnull(age) else ceil(age*365.25),\n",
+ " },\n",
+ " {\n",
+ " # We can also use a data frame to specify value mappings using the `matches` attribute\n",
+ " \"source\": \"Histologic_type\",\n",
+ " \"target\": \"primary_diagnosis\",\n",
+ " \"matches\": hist_type_vmap\n",
+ " },\n",
+ " # For dataframes that contain the 'source' and 'target' columns as attributes,\n",
+ " # such as the ones returned by the match_values() function, we can directly\n",
+ " # use them as mappings\n",
+ " ethinicity_vmap,\n",
+ " race_vmap,\n",
+ "]\n",
+ "\n",
+ "\n",
+ "harmonization_spec = bdi.merge_mappings(value_mappings, user_mappings)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we generate the harmonized dataset, with the user-defined value mappings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bmi | \n",
+ " tumor_largest_dimension_diameter | \n",
+ " tissue_or_organ_of_origin | \n",
+ " days_to_birth | \n",
+ " age_at_diagnosis | \n",
+ " primary_diagnosis | \n",
+ " ethnicity | \n",
+ " race | \n",
+ " country_of_birth | \n",
+ " history_of_tumor_type | \n",
+ " irs_stage | \n",
+ " gender | \n",
+ " tumor_focality | \n",
+ " tumor_shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 38.88 | \n",
+ " 2.9 | \n",
+ " Endometrium | \n",
+ " -23376.00 | \n",
+ " 23376.0 | \n",
+ " Endometrioid carcinoma | \n",
+ " not hispanic or latino | \n",
+ " white | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " Dome | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 39.76 | \n",
+ " 3.5 | \n",
+ " Endometrium | \n",
+ " -21184.50 | \n",
+ " 21185.0 | \n",
+ " Endometrioid carcinoma | \n",
+ " not hispanic or latino | \n",
+ " white | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " Dome | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 51.19 | \n",
+ " 4.5 | \n",
+ " Endometrium | \n",
+ " -18262.50 | \n",
+ " 18263.0 | \n",
+ " Endometrioid carcinoma | \n",
+ " not hispanic or latino | \n",
+ " white | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Endometrium | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma, NOS | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Phenochromocytoma or Paraganglioma | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 32.69 | \n",
+ " 3.5 | \n",
+ " Endometrium | \n",
+ " -27393.75 | \n",
+ " 27394.0 | \n",
+ " Endometrioid carcinoma | \n",
+ " not hispanic or latino | \n",
+ " white | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " 29.40 | \n",
+ " 4.2 | \n",
+ " Endometrium | \n",
+ " -27393.75 | \n",
+ " 27394.0 | \n",
+ " Endometrioid carcinoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " 35.42 | \n",
+ " 1.5 | \n",
+ " Endometrium | \n",
+ " -27028.50 | \n",
+ " 27029.0 | \n",
+ " Endometrioid carcinoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " 24.32 | \n",
+ " 3.8 | \n",
+ " Endometrium | \n",
+ " -31046.25 | \n",
+ " 31047.0 | \n",
+ " Serous cystadenocarcinoma | \n",
+ " not hispanic or latino | \n",
+ " black or african american | \n",
+ " United States | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " 34.06 | \n",
+ " 5.0 | \n",
+ " Endometrium | \n",
+ " -25567.50 | \n",
+ " 25568.0 | \n",
+ " Serous cystadenocarcinoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " None | \n",
+ " female | \n",
+ " Unifocal | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Endometrium | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Serous cystadenocarcinoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bmi tumor_largest_dimension_diameter tissue_or_organ_of_origin \\\n",
+ "0 38.88 2.9 Endometrium \n",
+ "1 39.76 3.5 Endometrium \n",
+ "2 51.19 4.5 Endometrium \n",
+ "3 NaN NaN Endometrium \n",
+ "4 32.69 3.5 Endometrium \n",
+ ".. ... ... ... \n",
+ "99 29.40 4.2 Endometrium \n",
+ "100 35.42 1.5 Endometrium \n",
+ "101 24.32 3.8 Endometrium \n",
+ "102 34.06 5.0 Endometrium \n",
+ "103 NaN NaN Endometrium \n",
+ "\n",
+ " days_to_birth age_at_diagnosis primary_diagnosis \\\n",
+ "0 -23376.00 23376.0 Endometrioid carcinoma \n",
+ "1 -21184.50 21185.0 Endometrioid carcinoma \n",
+ "2 -18262.50 18263.0 Endometrioid carcinoma \n",
+ "3 NaN NaN Carcinosarcoma, NOS \n",
+ "4 -27393.75 27394.0 Endometrioid carcinoma \n",
+ ".. ... ... ... \n",
+ "99 -27393.75 27394.0 Endometrioid carcinoma \n",
+ "100 -27028.50 27029.0 Endometrioid carcinoma \n",
+ "101 -31046.25 31047.0 Serous cystadenocarcinoma \n",
+ "102 -25567.50 25568.0 Serous cystadenocarcinoma \n",
+ "103 NaN NaN Serous cystadenocarcinoma \n",
+ "\n",
+ " ethnicity race country_of_birth \\\n",
+ "0 not hispanic or latino white United States \n",
+ "1 not hispanic or latino white United States \n",
+ "2 not hispanic or latino white United States \n",
+ "3 NaN NaN NaN \n",
+ "4 not hispanic or latino white United States \n",
+ ".. ... ... ... \n",
+ "99 NaN NaN Ukraine \n",
+ "100 NaN NaN Ukraine \n",
+ "101 not hispanic or latino black or african american United States \n",
+ "102 NaN NaN Ukraine \n",
+ "103 NaN NaN Ukraine \n",
+ "\n",
+ " history_of_tumor_type irs_stage gender tumor_focality \\\n",
+ "0 None None female Unifocal \n",
+ "1 None None female Unifocal \n",
+ "2 None None female Unifocal \n",
+ "3 Phenochromocytoma or Paraganglioma None NaN NaN \n",
+ "4 None None female Unifocal \n",
+ ".. ... ... ... ... \n",
+ "99 None None female Unifocal \n",
+ "100 None None female Unifocal \n",
+ "101 None None female Unifocal \n",
+ "102 None None female Unifocal \n",
+ "103 None None NaN NaN \n",
+ "\n",
+ " tumor_shape \n",
+ "0 Dome \n",
+ "1 Dome \n",
+ "2 None \n",
+ "3 NaN \n",
+ "4 None \n",
+ ".. ... \n",
+ "99 None \n",
+ "100 None \n",
+ "101 None \n",
+ "102 None \n",
+ "103 None \n",
+ "\n",
+ "[104 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "harmonized_dataset = bdi.materialize_mapping(dataset, harmonization_spec)\n",
+ "harmonized_dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For comparison, here is how our original data looked like:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " BMI | \n",
+ " Tumor_Size_cm | \n",
+ " Tumor_Site | \n",
+ " Age | \n",
+ " Age | \n",
+ " Histologic_type | \n",
+ " Ethnicity | \n",
+ " Race | \n",
+ " Country | \n",
+ " Histologic_type | \n",
+ " FIGO_stage | \n",
+ " Gender | \n",
+ " Tumor_Focality | \n",
+ " Tumor_Site | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 38.88 | \n",
+ " 2.9 | \n",
+ " Anterior endometrium | \n",
+ " 64.0 | \n",
+ " 64.0 | \n",
+ " Endometrioid | \n",
+ " Not-Hispanic or Latino | \n",
+ " White | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Anterior endometrium | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 39.76 | \n",
+ " 3.5 | \n",
+ " Posterior endometrium | \n",
+ " 58.0 | \n",
+ " 58.0 | \n",
+ " Endometrioid | \n",
+ " Not-Hispanic or Latino | \n",
+ " White | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Posterior endometrium | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 51.19 | \n",
+ " 4.5 | \n",
+ " Other, specify | \n",
+ " 50.0 | \n",
+ " 50.0 | \n",
+ " Endometrioid | \n",
+ " Not-Hispanic or Latino | \n",
+ " White | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 32.69 | \n",
+ " 3.5 | \n",
+ " Other, specify | \n",
+ " 75.0 | \n",
+ " 75.0 | \n",
+ " Endometrioid | \n",
+ " Not-Hispanic or Latino | \n",
+ " White | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " 29.40 | \n",
+ " 4.2 | \n",
+ " Other, specify | \n",
+ " 75.0 | \n",
+ " 75.0 | \n",
+ " Endometrioid | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " 35.42 | \n",
+ " 1.5 | \n",
+ " Other, specify | \n",
+ " 74.0 | \n",
+ " 74.0 | \n",
+ " Endometrioid | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " II | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " 24.32 | \n",
+ " 3.8 | \n",
+ " Other, specify | \n",
+ " 85.0 | \n",
+ " 85.0 | \n",
+ " Serous | \n",
+ " Not-Hispanic or Latino | \n",
+ " Black or African American | \n",
+ " United States | \n",
+ " Serous | \n",
+ " II | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " 34.06 | \n",
+ " 5.0 | \n",
+ " Other, specify | \n",
+ " 70.0 | \n",
+ " 70.0 | \n",
+ " Serous | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " IA | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " Other, specify | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " BMI Tumor_Size_cm Tumor_Site Age Age Histologic_type \\\n",
+ "0 38.88 2.9 Anterior endometrium 64.0 64.0 Endometrioid \n",
+ "1 39.76 3.5 Posterior endometrium 58.0 58.0 Endometrioid \n",
+ "2 51.19 4.5 Other, specify 50.0 50.0 Endometrioid \n",
+ "3 NaN NaN NaN NaN NaN Carcinosarcoma \n",
+ "4 32.69 3.5 Other, specify 75.0 75.0 Endometrioid \n",
+ ".. ... ... ... ... ... ... \n",
+ "99 29.40 4.2 Other, specify 75.0 75.0 Endometrioid \n",
+ "100 35.42 1.5 Other, specify 74.0 74.0 Endometrioid \n",
+ "101 24.32 3.8 Other, specify 85.0 85.0 Serous \n",
+ "102 34.06 5.0 Other, specify 70.0 70.0 Serous \n",
+ "103 NaN NaN NaN NaN NaN Serous \n",
+ "\n",
+ " Ethnicity Race Country \\\n",
+ "0 Not-Hispanic or Latino White United States \n",
+ "1 Not-Hispanic or Latino White United States \n",
+ "2 Not-Hispanic or Latino White United States \n",
+ "3 NaN NaN NaN \n",
+ "4 Not-Hispanic or Latino White United States \n",
+ ".. ... ... ... \n",
+ "99 NaN NaN Ukraine \n",
+ "100 NaN NaN Ukraine \n",
+ "101 Not-Hispanic or Latino Black or African American United States \n",
+ "102 NaN NaN Ukraine \n",
+ "103 NaN NaN Ukraine \n",
+ "\n",
+ " Histologic_type FIGO_stage Gender Tumor_Focality Tumor_Site \n",
+ "0 Endometrioid IA Female Unifocal Anterior endometrium \n",
+ "1 Endometrioid IA Female Unifocal Posterior endometrium \n",
+ "2 Endometrioid IA Female Unifocal Other, specify \n",
+ "3 Carcinosarcoma NaN NaN NaN NaN \n",
+ "4 Endometrioid IA Female Unifocal Other, specify \n",
+ ".. ... ... ... ... ... \n",
+ "99 Endometrioid IA Female Unifocal Other, specify \n",
+ "100 Endometrioid II Female Unifocal Other, specify \n",
+ "101 Serous II Female Unifocal Other, specify \n",
+ "102 Serous IA Female Unifocal Other, specify \n",
+ "103 Serous NaN NaN NaN NaN \n",
+ "\n",
+ "[104 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "original_columns = map(lambda m: m[\"source\"], harmonization_spec)\n",
+ "dataset[original_columns]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/getting-started-huang-gdc.ipynb b/examples/getting-started-huang-gdc.ipynb
new file mode 100644
index 00000000..b03ae43a
--- /dev/null
+++ b/examples/getting-started-huang-gdc.ipynb
@@ -0,0 +1,5224 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Getting Started"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, import the `bdikit` library."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import bdikit as bdi\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this example, we are mapping data from Dou et al. (https://pubmed.ncbi.nlm.nih.gov/37567170/) to the GDC format."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " case_id | \n",
+ " age | \n",
+ " gender | \n",
+ " country | \n",
+ " smoking_history | \n",
+ " smoke_age_start | \n",
+ " smoke_age_stop | \n",
+ " num_smoke_per_day | \n",
+ " num_pack_years_sm | \n",
+ " smoking_second_hand | \n",
+ " ... | \n",
+ " tumor_pathology_review | \n",
+ " ESTIMATE_stromal_score | \n",
+ " ESTIMATE_immune_score | \n",
+ " CD3_IHC_count | \n",
+ " stemness_score | \n",
+ " mutation_count | \n",
+ " neoAntigen_count | \n",
+ " chr_instability_idx | \n",
+ " integrated_subtype | \n",
+ " transcriptomic_subtype | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " C3L-00977 | \n",
+ " 56 | \n",
+ " Male | \n",
+ " Russia | \n",
+ " Current reformed smoker, years unknown | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " NaN | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 80% | \n",
+ " 6825.995755 | \n",
+ " 7989.115925 | \n",
+ " 1.0 | \n",
+ " 0.953243 | \n",
+ " 106 | \n",
+ " 0 | \n",
+ " 2.003654 | \n",
+ " Basal | \n",
+ " Mesenchymal | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " C3L-00987 | \n",
+ " 61 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 18 | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " 43.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 90%;SCC 90%;SCC 80%;SCC 70% | \n",
+ " 5999.793467 | \n",
+ " 4772.409716 | \n",
+ " 0.0 | \n",
+ " 0.825330 | \n",
+ " 83 | \n",
+ " 0 | \n",
+ " 5.205612 | \n",
+ " CIN | \n",
+ " Classical | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " C3L-00994 | \n",
+ " 50 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker within past 15 years | \n",
+ " 16 | \n",
+ " 50 | \n",
+ " 6 | \n",
+ " 10.2 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 40%;SCC 70%;SCC 40%;SCC 75% | \n",
+ " 8924.036564 | \n",
+ " 8176.233903 | \n",
+ " 5.0 | \n",
+ " 0.664581 | \n",
+ " 67 | \n",
+ " 0 | \n",
+ " 1.684475 | \n",
+ " Immune | \n",
+ " Mesenchymal | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " C3L-00995 | \n",
+ " 56 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker, more than 15 years | \n",
+ " 25 | \n",
+ " 56 | \n",
+ " 20 | \n",
+ " 31.0 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 90%;SCC 80%;SCC 40%;SCC 70% | \n",
+ " 8723.429667 | \n",
+ " 8342.246345 | \n",
+ " 70.0 | \n",
+ " 0.539918 | \n",
+ " 64 | \n",
+ " 0 | \n",
+ " 1.340483 | \n",
+ " Immune | \n",
+ " Mesenchymal | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " C3L-00997 | \n",
+ " 47 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker, more than 15 years | \n",
+ " 12 | \n",
+ " 27 | \n",
+ " 20 | \n",
+ " 15.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 70%;SCC 70% | \n",
+ " 7025.911695 | \n",
+ " 7445.251991 | \n",
+ " 60.0 | \n",
+ " 0.843765 | \n",
+ " 129 | \n",
+ " 0 | \n",
+ " 3.906370 | \n",
+ " CIN | \n",
+ " Classical | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " C3L-00999 | \n",
+ " 56 | \n",
+ " Male | \n",
+ " Russia | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 20 | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " 36.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 80%;SCC 70% | \n",
+ " 8510.704551 | \n",
+ " 8210.549555 | \n",
+ " 30.0 | \n",
+ " 0.548977 | \n",
+ " 159 | \n",
+ " 0 | \n",
+ " 1.148834 | \n",
+ " Immune | \n",
+ " Atypical | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " C3L-01138 | \n",
+ " 62 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 8 | \n",
+ " NaN | \n",
+ " 20 | \n",
+ " 54.0 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 70% | \n",
+ " 6283.423855 | \n",
+ " 6407.893478 | \n",
+ " 10.0 | \n",
+ " 0.890790 | \n",
+ " 187 | \n",
+ " 1 | \n",
+ " 3.924982 | \n",
+ " Basal | \n",
+ " Classical | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " C3L-01237 | \n",
+ " 57 | \n",
+ " Male | \n",
+ " Ukraine | \n",
+ " Current reformed smoker, more than 15 years | \n",
+ " 17 | \n",
+ " 20 | \n",
+ " 40 | \n",
+ " 6.0 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " ... | \n",
+ " SCC 80% | \n",
+ " 7175.667725 | \n",
+ " 5720.287055 | \n",
+ " 30.0 | \n",
+ " 0.738328 | \n",
+ " 141 | \n",
+ " 1 | \n",
+ " 4.286490 | \n",
+ " CIN | \n",
+ " Classical | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " C3L-02617 | \n",
+ " 64 | \n",
+ " Male | \n",
+ " Bulgaria | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 20 | \n",
+ " NaN | \n",
+ " 30 | \n",
+ " 66.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 90%;SCC 80%;SCC 80%;SCC 80%;SCC 80% | \n",
+ " 6346.815584 | \n",
+ " 6958.573390 | \n",
+ " NaN | \n",
+ " 0.906466 | \n",
+ " 470 | \n",
+ " 1 | \n",
+ " 4.744818 | \n",
+ " CIN | \n",
+ " Classical | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " C3L-02621 | \n",
+ " 68 | \n",
+ " Male | \n",
+ " Bulgaria | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " 38 | \n",
+ " NaN | \n",
+ " 30 | \n",
+ " 45.0 | \n",
+ " Yes | \n",
+ " ... | \n",
+ " SCC 70%;SCC 70%;SCC 70%;SCC 70%;SCC 65% | \n",
+ " 6806.384264 | \n",
+ " 8039.787184 | \n",
+ " NaN | \n",
+ " 0.754950 | \n",
+ " 358 | \n",
+ " 0 | \n",
+ " 1.774521 | \n",
+ " CIN | \n",
+ " Atypical | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10 rows × 37 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " case_id age gender country \\\n",
+ "0 C3L-00977 56 Male Russia \n",
+ "1 C3L-00987 61 Male Ukraine \n",
+ "2 C3L-00994 50 Male Ukraine \n",
+ "3 C3L-00995 56 Male Ukraine \n",
+ "4 C3L-00997 47 Male Ukraine \n",
+ "5 C3L-00999 56 Male Russia \n",
+ "6 C3L-01138 62 Male Ukraine \n",
+ "7 C3L-01237 57 Male Ukraine \n",
+ "8 C3L-02617 64 Male Bulgaria \n",
+ "9 C3L-02621 68 Male Bulgaria \n",
+ "\n",
+ " smoking_history smoke_age_start \\\n",
+ "0 Current reformed smoker, years unknown NaN \n",
+ "1 Current smoker: Includes daily and non-daily s... 18 \n",
+ "2 Current reformed smoker within past 15 years 16 \n",
+ "3 Current reformed smoker, more than 15 years 25 \n",
+ "4 Current reformed smoker, more than 15 years 12 \n",
+ "5 Current smoker: Includes daily and non-daily s... 20 \n",
+ "6 Current smoker: Includes daily and non-daily s... 8 \n",
+ "7 Current reformed smoker, more than 15 years 17 \n",
+ "8 Current smoker: Includes daily and non-daily s... 20 \n",
+ "9 Current smoker: Includes daily and non-daily s... 38 \n",
+ "\n",
+ " smoke_age_stop num_smoke_per_day num_pack_years_sm \\\n",
+ "0 NaN 20 NaN \n",
+ "1 NaN 20 43.0 \n",
+ "2 50 6 10.2 \n",
+ "3 56 20 31.0 \n",
+ "4 27 20 15.0 \n",
+ "5 NaN 20 36.0 \n",
+ "6 NaN 20 54.0 \n",
+ "7 20 40 6.0 \n",
+ "8 NaN 30 66.0 \n",
+ "9 NaN 30 45.0 \n",
+ "\n",
+ " smoking_second_hand ... \\\n",
+ "0 Yes ... \n",
+ "1 Yes ... \n",
+ "2 Exposure to secondhand smoke history not avail... ... \n",
+ "3 Exposure to secondhand smoke history not avail... ... \n",
+ "4 Yes ... \n",
+ "5 Yes ... \n",
+ "6 Exposure to secondhand smoke history not avail... ... \n",
+ "7 Exposure to secondhand smoke history not avail... ... \n",
+ "8 Yes ... \n",
+ "9 Yes ... \n",
+ "\n",
+ " tumor_pathology_review ESTIMATE_stromal_score \\\n",
+ "0 SCC 80% 6825.995755 \n",
+ "1 SCC 90%;SCC 90%;SCC 80%;SCC 70% 5999.793467 \n",
+ "2 SCC 40%;SCC 70%;SCC 40%;SCC 75% 8924.036564 \n",
+ "3 SCC 90%;SCC 80%;SCC 40%;SCC 70% 8723.429667 \n",
+ "4 SCC 70%;SCC 70% 7025.911695 \n",
+ "5 SCC 80%;SCC 70% 8510.704551 \n",
+ "6 SCC 70% 6283.423855 \n",
+ "7 SCC 80% 7175.667725 \n",
+ "8 SCC 90%;SCC 80%;SCC 80%;SCC 80%;SCC 80% 6346.815584 \n",
+ "9 SCC 70%;SCC 70%;SCC 70%;SCC 70%;SCC 65% 6806.384264 \n",
+ "\n",
+ " ESTIMATE_immune_score CD3_IHC_count stemness_score mutation_count \\\n",
+ "0 7989.115925 1.0 0.953243 106 \n",
+ "1 4772.409716 0.0 0.825330 83 \n",
+ "2 8176.233903 5.0 0.664581 67 \n",
+ "3 8342.246345 70.0 0.539918 64 \n",
+ "4 7445.251991 60.0 0.843765 129 \n",
+ "5 8210.549555 30.0 0.548977 159 \n",
+ "6 6407.893478 10.0 0.890790 187 \n",
+ "7 5720.287055 30.0 0.738328 141 \n",
+ "8 6958.573390 NaN 0.906466 470 \n",
+ "9 8039.787184 NaN 0.754950 358 \n",
+ "\n",
+ " neoAntigen_count chr_instability_idx integrated_subtype \\\n",
+ "0 0 2.003654 Basal \n",
+ "1 0 5.205612 CIN \n",
+ "2 0 1.684475 Immune \n",
+ "3 0 1.340483 Immune \n",
+ "4 0 3.906370 CIN \n",
+ "5 0 1.148834 Immune \n",
+ "6 1 3.924982 Basal \n",
+ "7 1 4.286490 CIN \n",
+ "8 1 4.744818 CIN \n",
+ "9 0 1.774521 CIN \n",
+ "\n",
+ " transcriptomic_subtype \n",
+ "0 Mesenchymal \n",
+ "1 Classical \n",
+ "2 Mesenchymal \n",
+ "3 Mesenchymal \n",
+ "4 Classical \n",
+ "5 Atypical \n",
+ "6 Classical \n",
+ "7 Classical \n",
+ "8 Classical \n",
+ "9 Atypical \n",
+ "\n",
+ "[10 rows x 37 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset = pd.read_csv(\"./datasets/Huang.csv\")\n",
+ "\n",
+ "# columns = [\n",
+ "# \"Country\",\n",
+ "# \"Histologic_type\",\n",
+ "# \"FIGO_stage\",\n",
+ "# \"BMI\",\n",
+ "# \"Age\",\n",
+ "# \"Race\",\n",
+ "# \"Ethnicity\",\n",
+ "# \"Gender\",\n",
+ "# \"Tumor_Focality\",\n",
+ "# \"Tumor_Size_cm\",\n",
+ "# ]\n",
+ "\n",
+ "# dataset[columns].head(10)\n",
+ "dataset.head(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Matching the table schema to GDC standard vocabulary\n",
+ "\n",
+ "`bdi-kit` offers a suite of functions to help with data harmonization tasks.\n",
+ "For instance, it can help with automatic discovery of one-to-one mappings between the columns in the input (source) dataset and a target dataset schema. The target schema can be either another table or a standard data vocabulary such as the GDC (Genomic Data Commons).\n",
+ "\n",
+ "To achieve this using `bdi-kit`, we can use the `match_schema()` function to match columns to the GDC vocabulary schema as follows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 37 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ac73cc03d9f24fbba8dec5864b71faab",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/37 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features loaded for 734 columns\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " case_id | \n",
+ " catalog_reference | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " age | \n",
+ " age_at_onset | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " gender | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " country | \n",
+ " country_of_birth | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " smoking_history | \n",
+ " tobacco_smoking_status | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " smoke_age_start | \n",
+ " years_smoked | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " smoke_age_stop | \n",
+ " years_smoked | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " num_smoke_per_day | \n",
+ " years_smoked | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " num_pack_years_sm | \n",
+ " pack_years_smoked | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " smoking_second_hand | \n",
+ " relative_smoker | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " smoking_inferred | \n",
+ " relative_smoker | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " alcohol_consum | \n",
+ " alcohol_intensity | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " num_yrs_alc_con | \n",
+ " undescended_testis_corrected_age | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " tumor_site_original | \n",
+ " primary_site | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " tumor_site_curated | \n",
+ " tumor_depth_descriptor | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " tumor_focality | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " tumor_size_cm | \n",
+ " tumor_width_measurement | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " histologic_type | \n",
+ " roots | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " histologic_grade | \n",
+ " who_nte_grade | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " tumor_necrosis | \n",
+ " necrosis_present | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " patho_staging_pt | \n",
+ " ensat_pathologic_t | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " patho_staging_pn | \n",
+ " ajcc_pathologic_n | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " clinic_staging_dist_metas | \n",
+ " uicc_clinical_m | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " tumor_stage | \n",
+ " ensat_pathologic_stage | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " P16 | \n",
+ " scan_tracer_used | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " HPV_inference | \n",
+ " vascular_invasion_present | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " NAT_pathology_review | \n",
+ " staining_intensity_scale | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " tumor_pathology_review | \n",
+ " best_overall_response | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " ESTIMATE_stromal_score | \n",
+ " transcript | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " ESTIMATE_immune_score | \n",
+ " transcript | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " CD3_IHC_count | \n",
+ " cd4_count | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " stemness_score | \n",
+ " transcript | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " mutation_count | \n",
+ " mutation_codon | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " neoAntigen_count | \n",
+ " ensat_pathologic_n | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " chr_instability_idx | \n",
+ " transcript | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " integrated_subtype | \n",
+ " roots | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " transcriptomic_subtype | \n",
+ " fab_morphology_code | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 case_id catalog_reference\n",
+ "1 age age_at_onset\n",
+ "2 gender gender\n",
+ "3 country country_of_birth\n",
+ "4 smoking_history tobacco_smoking_status\n",
+ "5 smoke_age_start years_smoked\n",
+ "6 smoke_age_stop years_smoked\n",
+ "7 num_smoke_per_day years_smoked\n",
+ "8 num_pack_years_sm pack_years_smoked\n",
+ "9 smoking_second_hand relative_smoker\n",
+ "10 smoking_inferred relative_smoker\n",
+ "11 alcohol_consum alcohol_intensity\n",
+ "12 num_yrs_alc_con undescended_testis_corrected_age\n",
+ "13 tumor_site_original primary_site\n",
+ "14 tumor_site_curated tumor_depth_descriptor\n",
+ "15 tumor_focality tumor_focality\n",
+ "16 tumor_size_cm tumor_width_measurement\n",
+ "17 histologic_type roots\n",
+ "18 histologic_grade who_nte_grade\n",
+ "19 tumor_necrosis necrosis_present\n",
+ "20 patho_staging_pt ensat_pathologic_t\n",
+ "21 patho_staging_pn ajcc_pathologic_n\n",
+ "22 clinic_staging_dist_metas uicc_clinical_m\n",
+ "23 tumor_stage ensat_pathologic_stage\n",
+ "24 P16 scan_tracer_used\n",
+ "25 HPV_inference vascular_invasion_present\n",
+ "26 NAT_pathology_review staining_intensity_scale\n",
+ "27 tumor_pathology_review best_overall_response\n",
+ "28 ESTIMATE_stromal_score transcript\n",
+ "29 ESTIMATE_immune_score transcript\n",
+ "30 CD3_IHC_count cd4_count\n",
+ "31 stemness_score transcript\n",
+ "32 mutation_count mutation_codon\n",
+ "33 neoAntigen_count ensat_pathologic_n\n",
+ "34 chr_instability_idx transcript\n",
+ "35 integrated_subtype roots\n",
+ "36 transcriptomic_subtype fab_morphology_code"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "column_mappings = bdi.match_schema(dataset, target=\"gdc\", method=\"ct_learning\")\n",
+ "column_mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Generating a harmonized table\n",
+ "\n",
+ "After discovering a schema mapping, we can generate a new table (DataFrame) using the new column names from the GDC standard vocabulary.\n",
+ "\n",
+ "To do so using `bdi-kit`, we can use the function `materialize_mapping()` as follows. Note that the column headers have been renamed to the target schema."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " country_of_birth | \n",
+ " dysplasia_type | \n",
+ " figo_stage | \n",
+ " hpv_positive_type | \n",
+ " weight | \n",
+ " race | \n",
+ " ethnicity | \n",
+ " gender | \n",
+ " tumor_focality | \n",
+ " tumor_depth | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 38.88 | \n",
+ " 64.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 2.9 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 39.76 | \n",
+ " 58.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 51.19 | \n",
+ " 50.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 32.69 | \n",
+ " 75.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " 29.40 | \n",
+ " 75.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 4.2 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " II | \n",
+ " 35.42 | \n",
+ " 74.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 1.5 | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " United States | \n",
+ " Serous | \n",
+ " II | \n",
+ " 24.32 | \n",
+ " 85.0 | \n",
+ " Black or African American | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 3.8 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " IA | \n",
+ " 34.06 | \n",
+ " 70.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " country_of_birth dysplasia_type figo_stage hpv_positive_type weight \\\n",
+ "0 United States Endometrioid IA 38.88 64.0 \n",
+ "1 United States Endometrioid IA 39.76 58.0 \n",
+ "2 United States Endometrioid IA 51.19 50.0 \n",
+ "3 NaN Carcinosarcoma NaN NaN NaN \n",
+ "4 United States Endometrioid IA 32.69 75.0 \n",
+ ".. ... ... ... ... ... \n",
+ "99 Ukraine Endometrioid IA 29.40 75.0 \n",
+ "100 Ukraine Endometrioid II 35.42 74.0 \n",
+ "101 United States Serous II 24.32 85.0 \n",
+ "102 Ukraine Serous IA 34.06 70.0 \n",
+ "103 Ukraine Serous NaN NaN NaN \n",
+ "\n",
+ " race ethnicity gender tumor_focality \\\n",
+ "0 White Not-Hispanic or Latino Female Unifocal \n",
+ "1 White Not-Hispanic or Latino Female Unifocal \n",
+ "2 White Not-Hispanic or Latino Female Unifocal \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 White Not-Hispanic or Latino Female Unifocal \n",
+ ".. ... ... ... ... \n",
+ "99 NaN NaN Female Unifocal \n",
+ "100 NaN NaN Female Unifocal \n",
+ "101 Black or African American Not-Hispanic or Latino Female Unifocal \n",
+ "102 NaN NaN Female Unifocal \n",
+ "103 NaN NaN NaN NaN \n",
+ "\n",
+ " tumor_depth \n",
+ "0 2.9 \n",
+ "1 3.5 \n",
+ "2 4.5 \n",
+ "3 NaN \n",
+ "4 3.5 \n",
+ ".. ... \n",
+ "99 4.2 \n",
+ "100 1.5 \n",
+ "101 3.8 \n",
+ "102 5.0 \n",
+ "103 NaN \n",
+ "\n",
+ "[104 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.materialize_mapping(dataset, column_mappings)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Generating a harmonized table with value mappings\n",
+ "\n",
+ "`bdi-kit` can also help with translation of the values from the source table to the target standard format.\n",
+ "\n",
+ "To this end, `bdi-kit` provides the function `match_values()` that automatically creates value mappings for each string column.\n",
+ "The output of `match_values()` can be fed to `materialize_mapping()` which materialized the final target using both schema and value mappings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " country_of_birth | \n",
+ " dysplasia_type | \n",
+ " figo_stage | \n",
+ " race | \n",
+ " ethnicity | \n",
+ " gender | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " None | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " None | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " None | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " Esophageal Mucosa Columnar Dysplasia | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " None | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " Stage IA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " Stage III | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " United States | \n",
+ " None | \n",
+ " Stage III | \n",
+ " black or african american | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " Stage IA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " Ukraine | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " country_of_birth dysplasia_type figo_stage \\\n",
+ "0 United States None Stage IA \n",
+ "1 United States None Stage IA \n",
+ "2 United States None Stage IA \n",
+ "3 NaN Esophageal Mucosa Columnar Dysplasia NaN \n",
+ "4 United States None Stage IA \n",
+ ".. ... ... ... \n",
+ "99 Ukraine None Stage IA \n",
+ "100 Ukraine None Stage III \n",
+ "101 United States None Stage III \n",
+ "102 Ukraine None Stage IA \n",
+ "103 Ukraine None NaN \n",
+ "\n",
+ " race ethnicity gender tumor_focality \n",
+ "0 white not hispanic or latino female Unifocal \n",
+ "1 white not hispanic or latino female Unifocal \n",
+ "2 white not hispanic or latino female Unifocal \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 white not hispanic or latino female Unifocal \n",
+ ".. ... ... ... ... \n",
+ "99 NaN NaN female Unifocal \n",
+ "100 NaN NaN female Unifocal \n",
+ "101 black or african american not hispanic or latino female Unifocal \n",
+ "102 NaN NaN female Unifocal \n",
+ "103 NaN NaN NaN NaN \n",
+ "\n",
+ "[104 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "value_mappings = bdi.match_values(dataset, column_mapping=column_mappings, target=\"gdc\", method=\"tfidf\")\n",
+ "bdi.materialize_mapping(dataset, value_mappings)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Verifying the schema mappings\n",
+ "\n",
+ "Sometimes the mappings generated automatically may be incorrect or you may to want verify them individually.\n",
+ "To verify the suggested column mappings, `bdi-kit` offers additional APIs to visualize the data and make any modifications when necessary. \n",
+ "\n",
+ "For this example, we will use the column `Histologic_type`. We can start by exploring the columns most similar to `Histologic_type`. \n",
+ "\n",
+ "For this, we can use the `top_matches()` function. Here, we notice that `primary_diagnosis` could be a potential target column.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "100%|██████████| 1/1 [00:00<00:00, 14.70it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 1 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 734/734 [00:58<00:00, 12.48it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 734 columns\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Histologic_type | \n",
+ " described_cases | \n",
+ " 0.589956 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Histologic_type | \n",
+ " slide_images | \n",
+ " 0.587552 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Histologic_type | \n",
+ " history_of_tumor_type | \n",
+ " 0.574640 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Histologic_type | \n",
+ " primary_diagnosis | \n",
+ " 0.573583 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Histologic_type | \n",
+ " additional_pathology_findings | \n",
+ " 0.562278 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Histologic_type | \n",
+ " pathology_details | \n",
+ " 0.562007 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Histologic_type | \n",
+ " pathology_reports | \n",
+ " 0.547307 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Histologic_type | \n",
+ " relationship_primary_diagnosis | \n",
+ " 0.524285 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Histologic_type | \n",
+ " diagnoses | \n",
+ " 0.519854 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Histologic_type | \n",
+ " family_histories | \n",
+ " 0.516649 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Histologic_type described_cases 0.589956\n",
+ "1 Histologic_type slide_images 0.587552\n",
+ "2 Histologic_type history_of_tumor_type 0.574640\n",
+ "3 Histologic_type primary_diagnosis 0.573583\n",
+ "4 Histologic_type additional_pathology_findings 0.562278\n",
+ "5 Histologic_type pathology_details 0.562007\n",
+ "6 Histologic_type pathology_reports 0.547307\n",
+ "7 Histologic_type relationship_primary_diagnosis 0.524285\n",
+ "8 Histologic_type diagnoses 0.519854\n",
+ "9 Histologic_type family_histories 0.516649"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hist_type_matches = bdi.top_matches(dataset, columns=[\"Histologic_type\"], target=\"gdc\")\n",
+ "hist_type_matches"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Viewing the column domains\n",
+ "\n",
+ "To verify that `primary_diagnosis` is a good target column, we view and compare the domains of each column using the `preview_domain()` function. For the source table, it returns the list of unique values in the source column. For the GDC target, it returns the list of unique valid values that a column can have.\n",
+ "\n",
+ "Here we see that the values seem to be related."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrioid | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Carcinosarcoma | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Serous | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Clear cell | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name\n",
+ "0 Endometrioid\n",
+ "1 Carcinosarcoma\n",
+ "2 Serous\n",
+ "3 Clear cell"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(dataset, \"Histologic_type\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ " value_description | \n",
+ " column_description | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Abdominal desmoid | \n",
+ " An insidious poorly circumscribed neoplasm ari... | \n",
+ " Text term used to describe the patient's histo... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Abdominal fibromatosis | \n",
+ " An insidious poorly circumscribed neoplasm ari... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Achromic nevus | \n",
+ " A benign nevus characterized by the absence of... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Acidophil adenocarcinoma | \n",
+ " A malignant epithelial neoplasm of the anterio... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Acidophil adenoma | \n",
+ " An epithelial neoplasm of the anterior pituita... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2620 | \n",
+ " Wolffian duct tumor | \n",
+ " An epithelial neoplasm of the female reproduct... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2621 | \n",
+ " Xanthofibroma | \n",
+ " A benign neoplasm composed of fibroblastic spi... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2622 | \n",
+ " Yolk sac tumor | \n",
+ " A non-seminomatous malignant germ cell tumor c... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2623 | \n",
+ " Unknown | \n",
+ " Not known, not observed, not recorded, or refu... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2624 | \n",
+ " Not Reported | \n",
+ " Not provided or available. | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2625 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name \\\n",
+ "0 Abdominal desmoid \n",
+ "1 Abdominal fibromatosis \n",
+ "2 Achromic nevus \n",
+ "3 Acidophil adenocarcinoma \n",
+ "4 Acidophil adenoma \n",
+ "... ... \n",
+ "2620 Wolffian duct tumor \n",
+ "2621 Xanthofibroma \n",
+ "2622 Yolk sac tumor \n",
+ "2623 Unknown \n",
+ "2624 Not Reported \n",
+ "\n",
+ " value_description \\\n",
+ "0 An insidious poorly circumscribed neoplasm ari... \n",
+ "1 An insidious poorly circumscribed neoplasm ari... \n",
+ "2 A benign nevus characterized by the absence of... \n",
+ "3 A malignant epithelial neoplasm of the anterio... \n",
+ "4 An epithelial neoplasm of the anterior pituita... \n",
+ "... ... \n",
+ "2620 An epithelial neoplasm of the female reproduct... \n",
+ "2621 A benign neoplasm composed of fibroblastic spi... \n",
+ "2622 A non-seminomatous malignant germ cell tumor c... \n",
+ "2623 Not known, not observed, not recorded, or refu... \n",
+ "2624 Not provided or available. \n",
+ "\n",
+ " column_description \n",
+ "0 Text term used to describe the patient's histo... \n",
+ "1 \n",
+ "2 \n",
+ "3 \n",
+ "4 \n",
+ "... ... \n",
+ "2620 \n",
+ "2621 \n",
+ "2622 \n",
+ "2623 \n",
+ "2624 \n",
+ "\n",
+ "[2625 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(\"gdc\", \"primary_diagnosis\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Since `primary_diagnosis` looks like a correct match for `Histologic_type`, we can modify the `column_mappings` variable directly."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Country | \n",
+ " country_of_birth | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Histologic_type | \n",
+ " primary_diagnosis | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " FIGO_stage | \n",
+ " figo_stage | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BMI | \n",
+ " hpv_positive_type | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Age | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Race | \n",
+ " race | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Ethnicity | \n",
+ " ethnicity | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Gender | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Tumor_Focality | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Tumor_Size_cm | \n",
+ " tumor_depth | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 Country country_of_birth\n",
+ "1 Histologic_type primary_diagnosis\n",
+ "2 FIGO_stage figo_stage\n",
+ "3 BMI hpv_positive_type\n",
+ "4 Age weight\n",
+ "5 Race race\n",
+ "6 Ethnicity ethnicity\n",
+ "7 Gender gender\n",
+ "8 Tumor_Focality tumor_focality\n",
+ "9 Tumor_Size_cm tumor_depth"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "column_mappings.loc[column_mappings[\"source\"] == \"Histologic_type\", \"target\"] = \"primary_diagnosis\"\n",
+ "column_mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Finding correct value mappings\n",
+ "\n",
+ "After finding the correct column, we need to find appropriate value mappings. \n",
+ "Using `match_values()`, we can inspect what the possible value mappings for this would look like after the harmonization.\n",
+ "\n",
+ "`bdi-kit` implements multiple methods for value mapping discovery, including:\n",
+ "\n",
+ " - `edit_distance` - Computes value similarities using Levenstein's edit distance measure.\n",
+ " - `tfidf` - A method based on tf-idf importance weighting computed over charcter n-grams.\n",
+ " - `embeddings` - Uses BERT word embeddings to compute \"semantic similarity\" between the values.\n",
+ "\n",
+ "To specify a value mapping approach, we can pass the `method` parameter."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ " 0.848485 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Clear cell | \n",
+ " Clear cell adenoma | \n",
+ " 0.714286 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " Stromal endometriosis | \n",
+ " 0.666667 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Neuronevus | \n",
+ " 0.625000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS 0.848485\n",
+ "1 Clear cell Clear cell adenoma 0.714286\n",
+ "2 Endometrioid Stromal endometriosis 0.666667\n",
+ "3 Serous Neuronevus 0.625000"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"edit_distance\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ " 0.969 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " 0.897 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Clear cell adenoma | \n",
+ " 0.853 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous carcinoma, NOS | \n",
+ " 0.755 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS 0.969\n",
+ "1 Endometrioid Endometrioid adenoma, NOS 0.897\n",
+ "2 Clear cell Clear cell adenoma 0.853\n",
+ "3 Serous Serous carcinoma, NOS 0.755"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"tfidf\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinofibroma | \n",
+ " 0.919 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrioid | \n",
+ " Endometrioid cystadenocarcinoma | \n",
+ " 0.810 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Clear cell | \n",
+ " Clear cell carcinoma | \n",
+ " 0.760 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous cystoma | \n",
+ " 0.661 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Carcinosarcoma Carcinofibroma 0.919\n",
+ "1 Endometrioid Endometrioid cystadenocarcinoma 0.810\n",
+ "2 Clear cell Clear cell carcinoma 0.760\n",
+ "3 Serous Serous cystoma 0.661"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"embedding\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Carcinosarcoma | \n",
+ " Carcinosarcoma, NOS | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Clear cell | \n",
+ " Clear cell adenocarcinoma, NOS | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrioid | \n",
+ " Endometrioid carcinoma | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Serous | \n",
+ " Serous cystadenocarcinoma | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 Carcinosarcoma Carcinosarcoma, NOS\n",
+ "1 Clear cell Clear cell adenocarcinoma, NOS\n",
+ "2 Endometrioid Endometrioid carcinoma\n",
+ "3 Serous Serous cystadenocarcinoma"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hist_type_vmap = pd.DataFrame(\n",
+ " columns=[\"source\", \"target\"],\n",
+ " data=[\n",
+ " (\"Carcinosarcoma\", \"Carcinosarcoma, NOS\"),\n",
+ " (\"Clear cell\", \"Clear cell adenocarcinoma, NOS\"),\n",
+ " (\"Endometrioid\", \"Endometrioid carcinoma\"),\n",
+ " (\"Serous\", \"Serous cystadenocarcinoma\"),\n",
+ " ],\n",
+ ")\n",
+ "hist_type_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Verifying multiple value mappings at once\n",
+ "\n",
+ "Besides verifying value mappings individually, you can also do it for all column mappings at once."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gender => gender\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Male | \n",
+ " male | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Female | \n",
+ " female | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Male male 1.0\n",
+ "1 Female female 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "country => country_of_birth\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Russia | \n",
+ " Russia | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Ukraine | \n",
+ " Ukraine | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Bulgaria | \n",
+ " Bulgaria | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Poland | \n",
+ " Poland | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " United States | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " China | \n",
+ " China | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Armenia | \n",
+ " Armenia | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " TSS did not collect | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Russia Russia 1.0\n",
+ "1 Ukraine Ukraine 1.0\n",
+ "2 Bulgaria Bulgaria 1.0\n",
+ "3 Poland Poland 1.0\n",
+ "4 United States United States 1.0\n",
+ "5 China China 1.0\n",
+ "6 Armenia Armenia 1.0\n",
+ "7 TSS did not collect None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "smoking_history => tobacco_smoking_status\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Current reformed smoker, more than 15 years | \n",
+ " Current Reformed Smoker for > 15 yrs | \n",
+ " 0.823 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Current reformed smoker, years unknown | \n",
+ " Current Reformed Smoker for > 15 yrs | \n",
+ " 0.756 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Current reformed smoker within past 15 years | \n",
+ " Current Reformed Smoker for > 15 yrs | \n",
+ " 0.719 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Lifelong non-smoker: Less than 100 cigarettes ... | \n",
+ " Lifelong Non-Smoker | \n",
+ " 0.697 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Smoking history not available | \n",
+ " Smoking history not documented | \n",
+ " 0.564 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Current smoker: Includes daily and non-daily s... | \n",
+ " Current Smoker | \n",
+ " 0.506 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source \\\n",
+ "0 Current reformed smoker, more than 15 years \n",
+ "1 Current reformed smoker, years unknown \n",
+ "2 Current reformed smoker within past 15 years \n",
+ "3 Lifelong non-smoker: Less than 100 cigarettes ... \n",
+ "4 Smoking history not available \n",
+ "5 Current smoker: Includes daily and non-daily s... \n",
+ "\n",
+ " target similarity \n",
+ "0 Current Reformed Smoker for > 15 yrs 0.823 \n",
+ "1 Current Reformed Smoker for > 15 yrs 0.756 \n",
+ "2 Current Reformed Smoker for > 15 yrs 0.719 \n",
+ "3 Lifelong Non-Smoker 0.697 \n",
+ "4 Smoking history not documented 0.564 \n",
+ "5 Current Smoker 0.506 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "smoking_second_hand => relative_smoker\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " No or minimal exposure to secondhand smoke | \n",
+ " No | \n",
+ " 0.410 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Exposure to secondhand smoke history not avail... | \n",
+ " Not Reported | \n",
+ " 0.352 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Yes Yes 1.000\n",
+ "1 No or minimal exposure to secondhand smoke No 0.410\n",
+ "2 Exposure to secondhand smoke history not avail... Not Reported 0.352"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "smoking_inferred => relative_smoker\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " weak_evidence | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " strong_evidence | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 weak_evidence None None\n",
+ "1 strong_evidence None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "alcohol_consum => alcohol_intensity\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Lifelong non-drinker | \n",
+ " Lifelong Non-Drinker | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Consumed alcohol in the past, but currently a ... | \n",
+ " Non-Drinker | \n",
+ " 0.437 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Alcohol consumption more than 2 drinks per day... | \n",
+ " Non-Drinker | \n",
+ " 0.370 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Alcohol consumption equal to or less than 2 dr... | \n",
+ " Occasional Drinker | \n",
+ " 0.360 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Alcohol consumption history not available | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target \\\n",
+ "0 Lifelong non-drinker Lifelong Non-Drinker \n",
+ "1 Consumed alcohol in the past, but currently a ... Non-Drinker \n",
+ "2 Alcohol consumption more than 2 drinks per day... Non-Drinker \n",
+ "3 Alcohol consumption equal to or less than 2 dr... Occasional Drinker \n",
+ "4 Alcohol consumption history not available None \n",
+ "5 nan None \n",
+ "\n",
+ " similarity \n",
+ "0 1.000 \n",
+ "1 0.437 \n",
+ "2 0.370 \n",
+ "3 0.360 \n",
+ "4 NaN \n",
+ "5 NaN "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "tumor_site_original => primary_site\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Floor of mouth | \n",
+ " Floor of mouth | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Larynx | \n",
+ " Larynx | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Oropharynx | \n",
+ " Oropharynx | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Base of tongue | \n",
+ " Base of tongue | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Lip | \n",
+ " Lip | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Tonsil | \n",
+ " Tonsil | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Tongue | \n",
+ " Base of tongue | \n",
+ " 0.733 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Oral cavity | \n",
+ " Other and ill-defined sites in lip, oral cavit... | \n",
+ " 0.542 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Other | \n",
+ " Other and unspecified parts of mouth | \n",
+ " 0.428 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Alveolar ridge | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Buccal mucosa | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target \\\n",
+ "0 Floor of mouth Floor of mouth \n",
+ "1 Larynx Larynx \n",
+ "2 Oropharynx Oropharynx \n",
+ "3 Base of tongue Base of tongue \n",
+ "4 Lip Lip \n",
+ "5 Tonsil Tonsil \n",
+ "6 Tongue Base of tongue \n",
+ "7 Oral cavity Other and ill-defined sites in lip, oral cavit... \n",
+ "8 Other Other and unspecified parts of mouth \n",
+ "9 Alveolar ridge None \n",
+ "10 Buccal mucosa None \n",
+ "\n",
+ " similarity \n",
+ "0 1.000 \n",
+ "1 1.000 \n",
+ "2 1.000 \n",
+ "3 1.000 \n",
+ "4 1.000 \n",
+ "5 1.000 \n",
+ "6 0.733 \n",
+ "7 0.542 \n",
+ "8 0.428 \n",
+ "9 NaN \n",
+ "10 NaN "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "tumor_site_curated => tumor_depth_descriptor\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Lip | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Oropharynx | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Oral cavity | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Hypopharynx | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Larynx | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Lip None None\n",
+ "1 Oropharynx None None\n",
+ "2 Oral cavity None None\n",
+ "3 Hypopharynx None None\n",
+ "4 Larynx None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "tumor_focality => tumor_focality\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Unifocal | \n",
+ " Unifocal | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Multifocal | \n",
+ " Multifocal | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Unifocal Unifocal 1.0\n",
+ "1 Multifocal Multifocal 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "histologic_grade => who_nte_grade\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " G3 Poorly differentiated | \n",
+ " Not Reported | \n",
+ " 0.431 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " G2 Moderately differentiated | \n",
+ " Not Reported | \n",
+ " 0.358 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " G1 Well differentiated | \n",
+ " Not Reported | \n",
+ " 0.310 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 G3 Poorly differentiated Not Reported 0.431\n",
+ "1 G2 Moderately differentiated Not Reported 0.358\n",
+ "2 G1 Well differentiated Not Reported 0.310"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "tumor_necrosis => necrosis_present\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Not identified | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Present | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Not identified None None\n",
+ "1 Present None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "patho_staging_pt => ensat_pathologic_t\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " T2 | \n",
+ " T2 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " pT3 | \n",
+ " T3 | \n",
+ " 0.703 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pT1 | \n",
+ " T1 | \n",
+ " 0.693 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pT2 | \n",
+ " T2 | \n",
+ " 0.661 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pT4 | \n",
+ " T4 | \n",
+ " 0.658 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " T4a | \n",
+ " T4 | \n",
+ " 0.567 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pT4a | \n",
+ " T4 | \n",
+ " 0.476 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " pT1b | \n",
+ " T1 | \n",
+ " 0.475 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 T2 T2 1.000\n",
+ "1 pT3 T3 0.703\n",
+ "2 pT1 T1 0.693\n",
+ "3 pT2 T2 0.661\n",
+ "4 pT4 T4 0.658\n",
+ "5 T4a T4 0.567\n",
+ "6 pT4a T4 0.476\n",
+ "7 pT1b T1 0.475"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "patho_staging_pn => ajcc_pathologic_n\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " N3b | \n",
+ " N3b | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " N1 | \n",
+ " N1 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " pN2c | \n",
+ " N2c | \n",
+ " 0.828 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " pN3a | \n",
+ " N3a | \n",
+ " 0.828 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " pN2b | \n",
+ " N2b | \n",
+ " 0.819 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " pN3b | \n",
+ " N3b | \n",
+ " 0.806 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " pNx | \n",
+ " NX | \n",
+ " 0.706 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " pN2 | \n",
+ " N2 | \n",
+ " 0.617 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " pN3 | \n",
+ " N3 | \n",
+ " 0.617 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " pN0 | \n",
+ " N0 | \n",
+ " 0.611 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " PN1 | \n",
+ " N1 | \n",
+ " 0.534 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 N3b N3b 1.000\n",
+ "1 N1 N1 1.000\n",
+ "2 pN2c N2c 0.828\n",
+ "3 pN3a N3a 0.828\n",
+ "4 pN2b N2b 0.819\n",
+ "5 pN3b N3b 0.806\n",
+ "6 pNx NX 0.706\n",
+ "7 pN2 N2 0.617\n",
+ "8 pN3 N3 0.617\n",
+ "9 pN0 N0 0.611\n",
+ "10 PN1 N1 0.534"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "clinic_staging_dist_metas => uicc_clinical_m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " m0 | \n",
+ " M0 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " cM0 | \n",
+ " cM0 (i+) | \n",
+ " 0.891 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " cMX | \n",
+ " MX | \n",
+ " 0.674 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Staging Incomplete | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 m0 M0 1.000\n",
+ "1 cM0 cM0 (i+) 0.891\n",
+ "2 cMX MX 0.674\n",
+ "3 Staging Incomplete None NaN\n",
+ "4 nan None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "tumor_stage => ensat_pathologic_stage\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Stage III | \n",
+ " Stage III | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Stage II | \n",
+ " Stage II | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Stage IV | \n",
+ " Stage IV | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Stage I | \n",
+ " Stage I | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Stage III Stage III 1.0\n",
+ "1 Stage II Stage II 1.0\n",
+ "2 Stage IV Stage IV 1.0\n",
+ "3 Stage I Stage I 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "P16 => scan_tracer_used\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Not Evaluated | \n",
+ " Acetate | \n",
+ " 0.326 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Positive (>70% nuclear and cytoplasmic staining) | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Unknown | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Negative | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Not Evaluated Acetate 0.326\n",
+ "1 Positive (>70% nuclear and cytoplasmic staining) None NaN\n",
+ "2 Unknown None NaN\n",
+ "3 Negative None NaN\n",
+ "4 nan None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "HPV_inference => vascular_invasion_present\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NO | \n",
+ " No | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " YES | \n",
+ " Yes | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 NO No 1.0\n",
+ "1 YES Yes 1.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "NAT_pathology_review => staining_intensity_scale\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Normal 60%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Normal 15%;Normal 10%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Normal 35%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Normal Cartilage 0%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Normal 25%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Soft Tissue No EPI% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Normal 20%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Muscle 0%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Normal 50%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Normal 40%;Normal 20%;Normal 15%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " N 35%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " N 5%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Normal EPI 10%;Normal EPI 10%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Normal 70$ EPI% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Normal Thyroid;Normal Thyroid;Normal No EPI% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Normal 40%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Normal 5%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Normal 1%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Normal 20%;SCC 75%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " SCC 30%;SCC 20%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " N 3%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " N 25%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Normal Cartilage% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Normal 0% EPI% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " N 15%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Soft Tissue No EPI;Normal No EPI% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Normal 20%;Normal No EPI;Normal 5%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " N 0%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " N 20%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Normal 20% EPI% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Normal 15%;Normal 5%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Normal 30%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " Normal 10%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " Hyperplasia 70%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " N lymphoid B% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " Normal no EPI% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " SCC 75%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " Normal 10%;Normal 15%;SCC 90%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " Normal 0%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " N 10%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " Normal 15%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " N 30%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " SCC 70%;Normal 10%;Normal 30%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " Normal 10%;Normal 10%;Normal 10%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " SCC 70%% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " Normal% | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Normal 60%% None None\n",
+ "1 Normal 15%;Normal 10%% None None\n",
+ "2 nan None None\n",
+ "3 Normal 35%% None None\n",
+ "4 Normal Cartilage 0%% None None\n",
+ "5 Normal 25%% None None\n",
+ "6 Soft Tissue No EPI% None None\n",
+ "7 Normal 20%% None None\n",
+ "8 Muscle 0%% None None\n",
+ "9 Normal 50%% None None\n",
+ "10 Normal 40%;Normal 20%;Normal 15%% None None\n",
+ "11 N 35%% None None\n",
+ "12 N 5%% None None\n",
+ "13 Normal EPI 10%;Normal EPI 10%% None None\n",
+ "14 Normal 70$ EPI% None None\n",
+ "15 Normal Thyroid;Normal Thyroid;Normal No EPI% None None\n",
+ "16 Normal 40%% None None\n",
+ "17 Normal 5%% None None\n",
+ "18 Normal 1%% None None\n",
+ "19 Normal 20%;SCC 75%% None None\n",
+ "20 SCC 30%;SCC 20%% None None\n",
+ "21 N 3%% None None\n",
+ "22 N 25%% None None\n",
+ "23 Normal Cartilage% None None\n",
+ "24 Normal 0% EPI% None None\n",
+ "25 N 15%% None None\n",
+ "26 Soft Tissue No EPI;Normal No EPI% None None\n",
+ "27 Normal 20%;Normal No EPI;Normal 5%% None None\n",
+ "28 N 0%% None None\n",
+ "29 N 20%% None None\n",
+ "30 Normal 20% EPI% None None\n",
+ "31 Normal 15%;Normal 5%% None None\n",
+ "32 Normal 30%% None None\n",
+ "33 Normal 10%% None None\n",
+ "34 Hyperplasia 70%% None None\n",
+ "35 N lymphoid B% None None\n",
+ "36 Normal no EPI% None None\n",
+ "37 SCC 75%% None None\n",
+ "38 Normal 10%;Normal 15%;SCC 90%% None None\n",
+ "39 Normal 0%% None None\n",
+ "40 N 10%% None None\n",
+ "41 Normal 15%% None None\n",
+ "42 N 30%% None None\n",
+ "43 SCC 70%;Normal 10%;Normal 30%% None None\n",
+ "44 Normal 10%;Normal 10%;Normal 10%% None None\n",
+ "45 SCC 70%% None None\n",
+ "46 Normal% None None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "neoAntigen_count => ensat_pathologic_n\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " N0 | \n",
+ " 0.54 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " N1 | \n",
+ " 0.54 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5 | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2 | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 3 | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 0 N0 0.54\n",
+ "1 1 N1 0.54\n",
+ "2 5 None NaN\n",
+ "3 4 None NaN\n",
+ "4 2 None NaN\n",
+ "5 3 None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "transcriptomic_subtype => fab_morphology_code\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Classical | \n",
+ " Not Classified | \n",
+ " 0.53 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Mesenchymal | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Atypical | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Basal | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Classical Not Classified 0.53\n",
+ "1 Mesenchymal None NaN\n",
+ "2 Atypical None NaN\n",
+ "3 Basal None NaN"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "mappings = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=column_mappings,\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "\n",
+ "for mapping in mappings:\n",
+ " print(f\"{mapping.attrs['source']} => {mapping.attrs['target']}\")\n",
+ " display(mapping)\n",
+ " print(\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " value_name | \n",
+ " value_description | \n",
+ " column_description | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " G1 | \n",
+ " A morphologic qualifier indicating that a canc... | \n",
+ " The WHO (World Health Organization) grading cl... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " G2 | \n",
+ " A morphologic qualifier indicating that a canc... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " G3 | \n",
+ " A morphologic qualifier indicating that a canc... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " GX | \n",
+ " A morphologic qualifier indicating that the gr... | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Unknown | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Not Reported | \n",
+ " Not provided or available. | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " value_name value_description \\\n",
+ "0 G1 A morphologic qualifier indicating that a canc... \n",
+ "1 G2 A morphologic qualifier indicating that a canc... \n",
+ "2 G3 A morphologic qualifier indicating that a canc... \n",
+ "3 GX A morphologic qualifier indicating that the gr... \n",
+ "4 Unknown \n",
+ "5 Not Reported Not provided or available. \n",
+ "\n",
+ " column_description \n",
+ "0 The WHO (World Health Organization) grading cl... \n",
+ "1 \n",
+ "2 \n",
+ "3 \n",
+ "4 \n",
+ "5 "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.preview_domain(\"gdc\", \"who_nte_grade\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Fixing remaining value mappings\n",
+ "\n",
+ "We need fix a few value mappings:\n",
+ "- Race\n",
+ "- Ethnicity\n",
+ "- Tumor_Site\n",
+ "\n",
+ "For race, we need to fix: `nan` -> `american indian or alaska native`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " White | \n",
+ " white | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Asian | \n",
+ " asian | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not Reported | \n",
+ " not reported | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Black or African American | \n",
+ " black or african american | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " nan | \n",
+ " american indian or alaska native | \n",
+ " 0.359 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 White white 1.000\n",
+ "1 Asian asian 1.000\n",
+ "2 Not Reported not reported 1.000\n",
+ "3 Black or African American black or african american 1.000\n",
+ "4 nan american indian or alaska native 0.359"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "race_vmap = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=(\"Race\", \"race\"),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "race_vmap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " White | \n",
+ " white | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Asian | \n",
+ " asian | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not Reported | \n",
+ " not reported | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Black or African American | \n",
+ " black or african american | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 White white 1.0\n",
+ "1 Asian asian 1.0\n",
+ "2 Not Reported not reported 1.0\n",
+ "3 Black or African American black or african american 1.0"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "race_vmap = race_vmap[race_vmap[\"similarity\"] >= 1.0]\n",
+ "race_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For `Ethnicity`, we need to fix: `Not reported` -> `not hispanic or latino`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Hispanic or Latino | \n",
+ " hispanic or latino | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Not-Hispanic or Latino | \n",
+ " not hispanic or latino | \n",
+ " 0.935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not reported | \n",
+ " not hispanic or latino | \n",
+ " 0.268 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " nan | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Hispanic or Latino hispanic or latino 1.000\n",
+ "1 Not-Hispanic or Latino not hispanic or latino 0.935\n",
+ "2 Not reported not hispanic or latino 0.268\n",
+ "3 nan None NaN"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ethinicity_vmap = bdi.match_values(\n",
+ " dataset,\n",
+ " column_mapping=(\"Ethnicity\", \"ethnicity\"),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "ethinicity_vmap\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Hispanic or Latino | \n",
+ " hispanic or latino | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Not-Hispanic or Latino | \n",
+ " not hispanic or latino | \n",
+ " 0.935 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Hispanic or Latino hispanic or latino 1.000\n",
+ "1 Not-Hispanic or Latino not hispanic or latino 0.935"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ethinicity_vmap = ethinicity_vmap[ethinicity_vmap[\"similarity\"] > 0.9]\n",
+ "ethinicity_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For `Tumor_Site`, given that this dataset is about endometrial cancer, all values must be mapped to \"Endometrium\". So instead of fixing each mapping individually, we will write a custom function that returns \"Endometrium\" regardless of the input value. Later, we will show how to use this function to transform the dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Anterior endometrium | \n",
+ " Endometrium | \n",
+ " 0.852 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Posterior endometrium | \n",
+ " Endometrium | \n",
+ " 0.823 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Other, specify | \n",
+ " Other specified parts of pancreas | \n",
+ " 0.543 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " nan | \n",
+ " Anal canal | \n",
+ " 0.301 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 Anterior endometrium Endometrium 0.852\n",
+ "1 Posterior endometrium Endometrium 0.823\n",
+ "2 Other, specify Other specified parts of pancreas 0.543\n",
+ "3 nan Anal canal 0.301"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_values(\n",
+ " dataset, column_mapping=(\"Tumor_Site\", \"tissue_or_organ_of_origin\"), target=\"gdc\", method=\"tfidf\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Custom mapping function that will be used to map the values of the 'Tumor_Site' column\n",
+ "def map_tumor_site(source_value):\n",
+ " return \"Endometrium\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Combining custom user mappings with suggested mappings\n",
+ "\n",
+ "Before generating a final harmonized dataset, we can combine the automatically generated value mappings with the fixed mappings provided by the user. To do so, we use `bdi.merge_mappings()` function, which take a list of mappings (e.g., generated automatically) and a list of \"user-defined mapping overrides\" that will be combined with the first list of mappings and will take precedence whenever they conflict.\n",
+ "\n",
+ "In our example below, all mappings specified in the variable `user_mappings` will override the mappings in `value_mappings` generated by the `bdi.match_values()` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from math import ceil\n",
+ "\n",
+ "user_mappings = [\n",
+ " {\n",
+ " # When no mapping is need, specifying the source and target is enough\n",
+ " \"source\": \"BMI\",\n",
+ " \"target\": \"bmi\",\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"Tumor_Size_cm\",\n",
+ " \"target\": \"tumor_largest_dimension_diameter\",\n",
+ " },\n",
+ " {\n",
+ " # mapper can be a custom Python function\n",
+ " \"source\": \"Tumor_Site\",\n",
+ " \"target\": \"tissue_or_organ_of_origin\",\n",
+ " \"mapper\": map_tumor_site,\n",
+ " },\n",
+ " {\n",
+ " # Lambda functions can also be used as mappers\n",
+ " \"source\": \"Age\",\n",
+ " \"target\": \"days_to_birth\",\n",
+ " \"mapper\": lambda age: -age * 365.25,\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"Age\",\n",
+ " \"target\": \"age_at_diagnosis\",\n",
+ " \"mapper\": lambda age: float(\"nan\") if pd.isnull(age) else ceil(age*365.25),\n",
+ " },\n",
+ " {\n",
+ " # We can also use a data frame to specify value mappings using the `matches` attribute\n",
+ " \"source\": \"Histologic_type\",\n",
+ " \"target\": \"primary_diagnosis\",\n",
+ " \"matches\": hist_type_vmap\n",
+ " },\n",
+ " # For dataframes that contain the 'source' and 'target' columns as attributes,\n",
+ " # such as the ones returned by the match_values() function, we can directly\n",
+ " # use them as mappings\n",
+ " ethinicity_vmap,\n",
+ " race_vmap,\n",
+ "]\n",
+ "\n",
+ "\n",
+ "harmonization_spec = bdi.merge_mappings(value_mappings, user_mappings)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we generate the harmonized dataset, with the user-defined value mappings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " tissue_or_organ_of_origin | \n",
+ " bmi | \n",
+ " days_to_birth | \n",
+ " age_at_diagnosis | \n",
+ " tumor_largest_dimension_diameter | \n",
+ " country_of_birth | \n",
+ " primary_diagnosis | \n",
+ " figo_stage | \n",
+ " race | \n",
+ " ethnicity | \n",
+ " gender | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Endometrium | \n",
+ " 38.88 | \n",
+ " -23376.00 | \n",
+ " 23376.0 | \n",
+ " 2.9 | \n",
+ " United States | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Endometrium | \n",
+ " 39.76 | \n",
+ " -21184.50 | \n",
+ " 21185.0 | \n",
+ " 3.5 | \n",
+ " United States | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Endometrium | \n",
+ " 51.19 | \n",
+ " -18262.50 | \n",
+ " 18263.0 | \n",
+ " 4.5 | \n",
+ " United States | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Endometrium | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma, NOS | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Endometrium | \n",
+ " 32.69 | \n",
+ " -27393.75 | \n",
+ " 27394.0 | \n",
+ " 3.5 | \n",
+ " United States | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " Stage IA | \n",
+ " white | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Endometrium | \n",
+ " 29.40 | \n",
+ " -27393.75 | \n",
+ " 27394.0 | \n",
+ " 4.2 | \n",
+ " Ukraine | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " Stage IA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Endometrium | \n",
+ " 35.42 | \n",
+ " -27028.50 | \n",
+ " 27029.0 | \n",
+ " 1.5 | \n",
+ " Ukraine | \n",
+ " Endometrioid adenoma, NOS | \n",
+ " Stage III | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " Endometrium | \n",
+ " 24.32 | \n",
+ " -31046.25 | \n",
+ " 31047.0 | \n",
+ " 3.8 | \n",
+ " United States | \n",
+ " Serous carcinoma, NOS | \n",
+ " Stage III | \n",
+ " black or african american | \n",
+ " not hispanic or latino | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Endometrium | \n",
+ " 34.06 | \n",
+ " -25567.50 | \n",
+ " 25568.0 | \n",
+ " 5.0 | \n",
+ " Ukraine | \n",
+ " Serous carcinoma, NOS | \n",
+ " Stage IA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " Endometrium | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " Serous carcinoma, NOS | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " tissue_or_organ_of_origin bmi days_to_birth age_at_diagnosis \\\n",
+ "0 Endometrium 38.88 -23376.00 23376.0 \n",
+ "1 Endometrium 39.76 -21184.50 21185.0 \n",
+ "2 Endometrium 51.19 -18262.50 18263.0 \n",
+ "3 Endometrium NaN NaN NaN \n",
+ "4 Endometrium 32.69 -27393.75 27394.0 \n",
+ ".. ... ... ... ... \n",
+ "99 Endometrium 29.40 -27393.75 27394.0 \n",
+ "100 Endometrium 35.42 -27028.50 27029.0 \n",
+ "101 Endometrium 24.32 -31046.25 31047.0 \n",
+ "102 Endometrium 34.06 -25567.50 25568.0 \n",
+ "103 Endometrium NaN NaN NaN \n",
+ "\n",
+ " tumor_largest_dimension_diameter country_of_birth \\\n",
+ "0 2.9 United States \n",
+ "1 3.5 United States \n",
+ "2 4.5 United States \n",
+ "3 NaN NaN \n",
+ "4 3.5 United States \n",
+ ".. ... ... \n",
+ "99 4.2 Ukraine \n",
+ "100 1.5 Ukraine \n",
+ "101 3.8 United States \n",
+ "102 5.0 Ukraine \n",
+ "103 NaN Ukraine \n",
+ "\n",
+ " primary_diagnosis figo_stage race \\\n",
+ "0 Endometrioid adenoma, NOS Stage IA white \n",
+ "1 Endometrioid adenoma, NOS Stage IA white \n",
+ "2 Endometrioid adenoma, NOS Stage IA white \n",
+ "3 Carcinosarcoma, NOS NaN NaN \n",
+ "4 Endometrioid adenoma, NOS Stage IA white \n",
+ ".. ... ... ... \n",
+ "99 Endometrioid adenoma, NOS Stage IA NaN \n",
+ "100 Endometrioid adenoma, NOS Stage III NaN \n",
+ "101 Serous carcinoma, NOS Stage III black or african american \n",
+ "102 Serous carcinoma, NOS Stage IA NaN \n",
+ "103 Serous carcinoma, NOS NaN NaN \n",
+ "\n",
+ " ethnicity gender tumor_focality \n",
+ "0 not hispanic or latino female Unifocal \n",
+ "1 not hispanic or latino female Unifocal \n",
+ "2 not hispanic or latino female Unifocal \n",
+ "3 NaN NaN NaN \n",
+ "4 not hispanic or latino female Unifocal \n",
+ ".. ... ... ... \n",
+ "99 NaN female Unifocal \n",
+ "100 NaN female Unifocal \n",
+ "101 not hispanic or latino female Unifocal \n",
+ "102 NaN female Unifocal \n",
+ "103 NaN NaN NaN \n",
+ "\n",
+ "[104 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "harmonized_dataset = bdi.materialize_mapping(dataset, harmonization_spec)\n",
+ "harmonized_dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For comparison, here is how our original data looked like:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Tumor_Site | \n",
+ " BMI | \n",
+ " Age | \n",
+ " Age | \n",
+ " Tumor_Size_cm | \n",
+ " Country | \n",
+ " Histologic_type | \n",
+ " FIGO_stage | \n",
+ " Race | \n",
+ " Ethnicity | \n",
+ " Gender | \n",
+ " Tumor_Focality | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Anterior endometrium | \n",
+ " 38.88 | \n",
+ " 64.0 | \n",
+ " 64.0 | \n",
+ " 2.9 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Posterior endometrium | \n",
+ " 39.76 | \n",
+ " 58.0 | \n",
+ " 58.0 | \n",
+ " 3.5 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Other, specify | \n",
+ " 51.19 | \n",
+ " 50.0 | \n",
+ " 50.0 | \n",
+ " 4.5 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Other, specify | \n",
+ " 32.69 | \n",
+ " 75.0 | \n",
+ " 75.0 | \n",
+ " 3.5 | \n",
+ " United States | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Other, specify | \n",
+ " 29.40 | \n",
+ " 75.0 | \n",
+ " 75.0 | \n",
+ " 4.2 | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " IA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Other, specify | \n",
+ " 35.42 | \n",
+ " 74.0 | \n",
+ " 74.0 | \n",
+ " 1.5 | \n",
+ " Ukraine | \n",
+ " Endometrioid | \n",
+ " II | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " Other, specify | \n",
+ " 24.32 | \n",
+ " 85.0 | \n",
+ " 85.0 | \n",
+ " 3.8 | \n",
+ " United States | \n",
+ " Serous | \n",
+ " II | \n",
+ " Black or African American | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Other, specify | \n",
+ " 34.06 | \n",
+ " 70.0 | \n",
+ " 70.0 | \n",
+ " 5.0 | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " IA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Female | \n",
+ " Unifocal | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Ukraine | \n",
+ " Serous | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
104 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Tumor_Site BMI Age Age Tumor_Size_cm Country \\\n",
+ "0 Anterior endometrium 38.88 64.0 64.0 2.9 United States \n",
+ "1 Posterior endometrium 39.76 58.0 58.0 3.5 United States \n",
+ "2 Other, specify 51.19 50.0 50.0 4.5 United States \n",
+ "3 NaN NaN NaN NaN NaN NaN \n",
+ "4 Other, specify 32.69 75.0 75.0 3.5 United States \n",
+ ".. ... ... ... ... ... ... \n",
+ "99 Other, specify 29.40 75.0 75.0 4.2 Ukraine \n",
+ "100 Other, specify 35.42 74.0 74.0 1.5 Ukraine \n",
+ "101 Other, specify 24.32 85.0 85.0 3.8 United States \n",
+ "102 Other, specify 34.06 70.0 70.0 5.0 Ukraine \n",
+ "103 NaN NaN NaN NaN NaN Ukraine \n",
+ "\n",
+ " Histologic_type FIGO_stage Race \\\n",
+ "0 Endometrioid IA White \n",
+ "1 Endometrioid IA White \n",
+ "2 Endometrioid IA White \n",
+ "3 Carcinosarcoma NaN NaN \n",
+ "4 Endometrioid IA White \n",
+ ".. ... ... ... \n",
+ "99 Endometrioid IA NaN \n",
+ "100 Endometrioid II NaN \n",
+ "101 Serous II Black or African American \n",
+ "102 Serous IA NaN \n",
+ "103 Serous NaN NaN \n",
+ "\n",
+ " Ethnicity Gender Tumor_Focality \n",
+ "0 Not-Hispanic or Latino Female Unifocal \n",
+ "1 Not-Hispanic or Latino Female Unifocal \n",
+ "2 Not-Hispanic or Latino Female Unifocal \n",
+ "3 NaN NaN NaN \n",
+ "4 Not-Hispanic or Latino Female Unifocal \n",
+ ".. ... ... ... \n",
+ "99 NaN Female Unifocal \n",
+ "100 NaN Female Unifocal \n",
+ "101 Not-Hispanic or Latino Female Unifocal \n",
+ "102 NaN Female Unifocal \n",
+ "103 NaN NaN NaN \n",
+ "\n",
+ "[104 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "original_columns = map(lambda m: m[\"source\"], harmonization_spec)\n",
+ "dataset[original_columns]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/getting-started-jf.ipynb b/examples/getting-started-jf.ipynb
new file mode 100644
index 00000000..d245e9b9
--- /dev/null
+++ b/examples/getting-started-jf.ipynb
@@ -0,0 +1,3547 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Getting Started"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, import the `bdikit` library."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import bdikit as bdi\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this example, we are mapping data from Dou et al. (https://pubmed.ncbi.nlm.nih.gov/37567170/) to the GDC format."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " Histologic_Grade_FIGO | \n",
+ " Histologic_type | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " Clin_Stage_Dist_Mets-cM | \n",
+ " Path_Stage_Dist_Mets-pM | \n",
+ " tumor_Stage-Pathological | \n",
+ " FIGO_stage | \n",
+ " BMI | \n",
+ " Age | \n",
+ " Race | \n",
+ " Ethnicity | \n",
+ " Gender | \n",
+ " Tumor_Site | \n",
+ " Tumor_Focality | \n",
+ " Tumor_Size_cm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 38.88 | \n",
+ " 64.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Anterior endometrium | \n",
+ " Unifocal | \n",
+ " 2.9 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage IV | \n",
+ " IA | \n",
+ " 39.76 | \n",
+ " 58.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Posterior endometrium | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " United States | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 51.19 | \n",
+ " 50.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Carcinosarcoma | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " United States | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " No pathologic evidence of distant metastasis | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 32.69 | \n",
+ " 75.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 3.5 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " United States | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 20.28 | \n",
+ " 63.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 55.67 | \n",
+ " 50.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Other_specify | \n",
+ " FIGO grade 2 | \n",
+ " Endometrioid | \n",
+ " pT1a (FIGO IA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 25.68 | \n",
+ " 60.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " United States | \n",
+ " NaN | \n",
+ " Serous | \n",
+ " pT3a (FIGO IIIA) | \n",
+ " pNX | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage III | \n",
+ " IIIA | \n",
+ " 21.57 | \n",
+ " 83.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " United States | \n",
+ " FIGO grade 1 | \n",
+ " Endometrioid | \n",
+ " pT1 (FIGO I) | \n",
+ " pN0 | \n",
+ " cM0 | \n",
+ " Staging Incomplete | \n",
+ " Stage I | \n",
+ " IA | \n",
+ " 34.26 | \n",
+ " 69.0 | \n",
+ " White | \n",
+ " Not-Hispanic or Latino | \n",
+ " Female | \n",
+ " Other, specify | \n",
+ " Unifocal | \n",
+ " 5.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Country Histologic_Grade_FIGO Histologic_type \\\n",
+ "0 United States FIGO grade 1 Endometrioid \n",
+ "1 United States FIGO grade 1 Endometrioid \n",
+ "2 United States FIGO grade 2 Endometrioid \n",
+ "3 NaN NaN Carcinosarcoma \n",
+ "4 United States FIGO grade 2 Endometrioid \n",
+ "5 United States NaN Serous \n",
+ "6 United States FIGO grade 1 Endometrioid \n",
+ "7 Other_specify FIGO grade 2 Endometrioid \n",
+ "8 United States NaN Serous \n",
+ "9 United States FIGO grade 1 Endometrioid \n",
+ "\n",
+ " Path_Stage_Primary_Tumor-pT Path_Stage_Reg_Lymph_Nodes-pN \\\n",
+ "0 pT1a (FIGO IA) pN0 \n",
+ "1 pT1a (FIGO IA) pNX \n",
+ "2 pT1a (FIGO IA) pN0 \n",
+ "3 NaN NaN \n",
+ "4 pT1a (FIGO IA) pNX \n",
+ "5 pT1a (FIGO IA) pNX \n",
+ "6 pT1a (FIGO IA) pNX \n",
+ "7 pT1a (FIGO IA) pNX \n",
+ "8 pT3a (FIGO IIIA) pNX \n",
+ "9 pT1 (FIGO I) pN0 \n",
+ "\n",
+ " Clin_Stage_Dist_Mets-cM Path_Stage_Dist_Mets-pM \\\n",
+ "0 cM0 Staging Incomplete \n",
+ "1 cM0 Staging Incomplete \n",
+ "2 cM0 Staging Incomplete \n",
+ "3 NaN NaN \n",
+ "4 cM0 No pathologic evidence of distant metastasis \n",
+ "5 cM0 Staging Incomplete \n",
+ "6 cM0 Staging Incomplete \n",
+ "7 cM0 Staging Incomplete \n",
+ "8 cM0 Staging Incomplete \n",
+ "9 cM0 Staging Incomplete \n",
+ "\n",
+ " tumor_Stage-Pathological FIGO_stage BMI Age Race \\\n",
+ "0 Stage I IA 38.88 64.0 White \n",
+ "1 Stage IV IA 39.76 58.0 White \n",
+ "2 Stage I IA 51.19 50.0 White \n",
+ "3 NaN NaN NaN NaN NaN \n",
+ "4 Stage I IA 32.69 75.0 White \n",
+ "5 Stage I IA 20.28 63.0 White \n",
+ "6 Stage I IA 55.67 50.0 White \n",
+ "7 Stage I IA 25.68 60.0 White \n",
+ "8 Stage III IIIA 21.57 83.0 White \n",
+ "9 Stage I IA 34.26 69.0 White \n",
+ "\n",
+ " Ethnicity Gender Tumor_Site Tumor_Focality \\\n",
+ "0 Not-Hispanic or Latino Female Anterior endometrium Unifocal \n",
+ "1 Not-Hispanic or Latino Female Posterior endometrium Unifocal \n",
+ "2 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "5 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "6 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "7 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "8 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "9 Not-Hispanic or Latino Female Other, specify Unifocal \n",
+ "\n",
+ " Tumor_Size_cm \n",
+ "0 2.9 \n",
+ "1 3.5 \n",
+ "2 4.5 \n",
+ "3 NaN \n",
+ "4 3.5 \n",
+ "5 6.0 \n",
+ "6 4.5 \n",
+ "7 5.0 \n",
+ "8 4.0 \n",
+ "9 5.2 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset = pd.read_csv(\"./datasets/dou.csv\")\n",
+ "\n",
+ "columns = [\n",
+ " \"Country\",\n",
+ " \"Histologic_type\",\n",
+ " \"FIGO_stage\",\n",
+ " \"BMI\",\n",
+ " \"Age\",\n",
+ " \"Race\",\n",
+ " \"Ethnicity\",\n",
+ " \"Gender\",\n",
+ " \"Tumor_Focality\",\n",
+ " \"Tumor_Size_cm\",\n",
+ "]\n",
+ "\n",
+ "dataset.head(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Matching the table schema to GDC standard vocabulary\n",
+ "\n",
+ "`bdi-kit` offers a suite of functions to help with data harmonization tasks.\n",
+ "For instance, it can help with automatic discovery of one-to-one mappings between the columns in the input (source) dataset and a target dataset schema. The target schema can be either another table or a standard data vocabulary such as the GDC (Genomic Data Commons).\n",
+ "\n",
+ "To achieve this using `bdi-kit`, we can use the `match_schema()` function to match columns to the GDC vocabulary schema as follows.\n",
+ "\n",
+ "The GDC schema contains XXX attributes and current SOTA is not able to handle ... explain why we need this model. point to a description of the model.\n",
+ "\n",
+ "Note: This step requires the model to be downloaded and it may take a few minutes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting features from 10 columns...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3031a658aaca478184486c9c8cf567f7",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/10 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features loaded for 734 columns\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Country | \n",
+ " country_of_birth | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Histologic_type | \n",
+ " history_of_tumor_type | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " FIGO_stage | \n",
+ " figo_stage | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BMI | \n",
+ " average_base_quality | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Age | \n",
+ " age_at_diagnosis | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Race | \n",
+ " race | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Ethnicity | \n",
+ " ethnicity | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Gender | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Tumor_Focality | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Tumor_Size_cm | \n",
+ " tumor_width_measurement | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 Country country_of_birth\n",
+ "1 Histologic_type history_of_tumor_type\n",
+ "2 FIGO_stage figo_stage\n",
+ "3 BMI average_base_quality\n",
+ "4 Age age_at_diagnosis\n",
+ "5 Race race\n",
+ "6 Ethnicity ethnicity\n",
+ "7 Gender gender\n",
+ "8 Tumor_Focality tumor_focality\n",
+ "9 Tumor_Size_cm tumor_width_measurement"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.match_schema(dataset[columns], target=\"gdc\", method=\"ct_learning\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "100%|██████████| 17/17 [00:00<00:00, 25.70it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 17 columns\n",
+ "Table features loaded for 734 columns\n",
+ "Distances (cosine): [1.10394392 0.98914557 0.92537964 0.94752893 1.05914744 1.08939152\n",
+ " 1.04157806 1.04617294 0.97803455 1.02611567 0.92811078 0.98646065\n",
+ " 0.92494871 0.82554179 1.08864448 1.11371916 0.9982571 0.88276508\n",
+ " 1.03046031 1.14162502 0.98299454 0.89779826 0.9763337 1.07523165\n",
+ " 0.89635589 1.06016441 1.0512307 1.02413479 0.91700193 1.08814005\n",
+ " 0.98184121 1.12561218 1.1200962 1.01722983 1.0703122 1.12627641\n",
+ " 1.07342407 1.05760935 1.1052211 1.20270863 1.05279379 1.08772562\n",
+ " 1.07630705 1.03543143 1.10607188 0.86132086 0.92440717 1.01018515\n",
+ " 0.93720262 1.00066075 0.9507775 0.8202722 0.93760846 1.00147017\n",
+ " 1.09045534 1.07474188 0.97043665 1.06990152 1.08130116 1.03859922\n",
+ " 1.09030101 0.94105161 1.01664633 1.01230161 0.99208111 0.83868857\n",
+ " 1.03597787 0.91045015 1.04946121 1.06219909 0.99042535 1.17921298\n",
+ " 1.06863905 1.11142212 1.1048234 1.04655936 1.07739838 1.02920307\n",
+ " 0.99854448 1.12527391 1.06252065 0.97119052 1.10264705 1.15662817\n",
+ " 0.93040146 0.92724415 0.80938487 1.10791158 0.97412993 1.11506726\n",
+ " 1.0649948 0.98802979 0.92422746 0.88855234 1.0779642 0.74885521\n",
+ " 0.91476221 1.05398798 0.93567329 1.10098667 1.14394185 1.10934022\n",
+ " 1.0617265 0.95444284 1.10284081 1.03876524 0.92237638 0.88370692\n",
+ " 0.94089827 1.13945938 0.99603632 0.93801217 1.00976941 0.88026456\n",
+ " 0.98282522 1.05058358 0.9860883 1.05932552 0.96329221 1.11555802\n",
+ " 1.06438735 1.00755368 1.0020513 1.01255041 0.77703186 0.95950253\n",
+ " 1.02155211 1.07603479 1.04840982 1.01180617 0.98880235 1.01487407\n",
+ " 0.98343204 1.02755002 1.08020208 0.48299481 0.59773088 0.96341324\n",
+ " 1.06461732 0.95000656 0.87175934 1.03878844 1.08659591 1.0836373\n",
+ " 1.08483535 1.10845733 1.09937026 0.99903025 1.11586622 0.94408797\n",
+ " 1.0390607 0.93682634 0.90696471 1.03023081 0.85651648 0.96370617\n",
+ " 0.94631951 0.96832422 1.09166494 0.98129671 1.09322019 0.96358111\n",
+ " 1.07313259 0.98367198 0.98307862 0.95333907 1.06234025 1.01072523\n",
+ " 1.01175053 0.92528214 0.98420634 1.20171161 1.13879293 1.0812003\n",
+ " 0.92027825 1.18482933 1.16669788 1.18738423 1.08630438 0.95013934\n",
+ " 0.843392 0.92903281 0.93205611 1.05871603 0.92614523 1.0222684\n",
+ " 1.10576144 1.10696842 1.04018278 1.14572734 1.0777618 0.98755742\n",
+ " 1.03651642 0.84308325 0.85711754 0.93358796 1.01399868 0.99781095\n",
+ " 0.84868517 0.8115899 0.85617877 0.92992486 1.0097102 0.91364532\n",
+ " 1.03334671 1.09466286 0.98169192 1.20903555 1.08876504 1.03504838\n",
+ " 1.0169913 0.73978803 0.93835376 1.01465904 1.01795997 0.98929862\n",
+ " 1.00141661 1.18443834 1.12905452 1.19624601 1.22307307 0.96228825\n",
+ " 1.11988601 0.95709925 0.82248892 0.99036264 1.15884998 1.23112248\n",
+ " 0.9625345 0.8891403 0.96666751 0.98046031 1.08053804 0.97642373\n",
+ " 1.01776627 1.09889763 1.18154788 1.16717484 1.14716145 1.15079721\n",
+ " 1.11800581 0.99361518 0.87273435 1.15907701 1.12392701 0.85336636\n",
+ " 1.24135173 1.07302095 0.85393121 0.99982827 0.97755079 1.06181857\n",
+ " 0.95620558 1.08254723 1.08192154 1.09986565 0.98880487 1.07818384\n",
+ " 0.90974765 1.04102068 0.80043169 1.09045143 1.04032956 1.03911996\n",
+ " 1.06043302 0.99891267 1.01993761 1.06014306 0.97428953 1.04603332\n",
+ " 1.06379186 1.14001071 1.07279578 1.08315104 0.99714045 1.05195085\n",
+ " 0.95903429 1.05745751 1.0901864 0.91190991 1.07615513 1.18597678\n",
+ " 1.03708796 0.93607662 1.08018328 0.90886744 1.02632555 1.11539912\n",
+ " 0.93305226 1.09629403 1.12550839 0.80241756 1.00335694 1.05990332\n",
+ " 0.98862793 1.06124536 0.84956406 1.05028204 1.03342671 1.11216918\n",
+ " 1.18161967 0.91032244 0.92101728 1.03122995 0.90456281 1.08494827\n",
+ " 0.86583913 0.98891987 1.00621366 0.9859491 1.00789291 0.99695072\n",
+ " 1.05242665 0.91534743 1.04813606 1.04154024 0.98333674 0.96281704\n",
+ " 0.67681641 1.04877961 0.92801401 0.81800031 0.78160584 1.11327994\n",
+ " 0.93739196 1.03289162 0.950401 0.90061114 0.97318549 0.99941559\n",
+ " 1.17323439 1.09968938 0.98071646 1.08068658 0.89749954 1.0458911\n",
+ " 1.12768754 0.87086419 1.18142233 1.01785715 1.04352135 1.05539603\n",
+ " 1.03877955 0.97201685 1.00169682 0.93782883 1.03205261 0.91170141\n",
+ " 0.96709506 0.90107587 0.98676919 0.90332241 1.05720276 0.98237062\n",
+ " 0.99980417 0.8101733 1.09432457 0.83165042 0.916839 1.10037032\n",
+ " 0.98565848 1.05722715 0.99255827 1.07289264 1.05466982 0.94580267\n",
+ " 1.00602562 0.89057042 0.88157763 0.94248613 0.87316393 1.05024452\n",
+ " 1.06235684 1.05657489 0.97377679 1.02328566 1.1440696 0.95296827\n",
+ " 0.95592317 0.95327472 1.22615593 1.09490717 0.96420086 0.94010969\n",
+ " 1.0026252 1.03672205 1.08922413 1.07104104 1.09634504 1.03859818\n",
+ " 1.06104437 1.04128541 0.99448946 0.98926661 1.04007511 1.06703309\n",
+ " 0.9727568 1.09338744 1.01456652 1.01050629 0.87937028 0.98677152\n",
+ " 0.9213385 0.91740966 0.88960243 0.63950099 0.79796862 1.08258118\n",
+ " 0.97580701 1.08796435 1.07144308 1.0008701 0.98971714 1.0170135\n",
+ " 0.82637575 0.94891125 0.71287148 1.09095092 0.87158585 0.73344138\n",
+ " 0.74428246 0.8422214 0.94960729 0.97114587 1.0489234 0.92288686\n",
+ " 1.01545398 1.03625752 0.85898154 0.98714234 0.94018373 1.12481494\n",
+ " 1.00119273 1.11810425 1.01326632 0.92469032 0.92980056 0.92727668\n",
+ " 0.9768303 0.87982122 0.91996929 1.11597287 1.1182633 1.05027882\n",
+ " 1.0392678 1.05293765 1.06230108 1.07126471 1.03459204 1.06692802\n",
+ " 1.06680581 1.07578711 1.09832561 1.06785771 1.09110073 1.072232\n",
+ " 1.11229952 0.73257871 0.97300813 1.01270747 0.97923095 0.9118936\n",
+ " 1.09798411 0.94435306 1.01845477 0.99266677 1.01822988 1.09277295\n",
+ " 1.00182525 1.05582098 0.94657405 0.86855501 0.80821068 0.78833387\n",
+ " 1.08846918 0.9472507 0.99033926 0.94183086 0.99213522 0.88507271\n",
+ " 0.84866263 1.04572674 1.02420407 0.92213033 1.0096699 0.83423474\n",
+ " 0.96515487 0.95035134 1.09420516 0.79956608 1.1221821 1.09863266\n",
+ " 1.13301877 1.13794924 1.07165549 1.0894248 1.04193192 0.99004886\n",
+ " 1.02817055 0.92258989 1.09123675 0.97055931 0.59536741 0.90337304\n",
+ " 0.83098943 1.07640774 1.09815195 1.02314465 0.99043423 1.0401784\n",
+ " 1.02778398 1.02133882 0.93815909 0.99128063 1.01307365 1.13949215\n",
+ " 1.23090947 1.10641274 1.07136228 0.92427334 0.83144663 1.08548376\n",
+ " 0.95114038 0.83579505 0.86071993 0.81958304 0.9292794 0.82915263\n",
+ " 1.000207 0.85948604 0.74625659 0.77283614 1.02212775 1.09282251\n",
+ " 1.03878331 0.92174042 1.02441132 1.08259223 0.92233169 1.05281913\n",
+ " 1.02193065 1.08589466 1.0385232 1.07859742 1.12202981 1.23649353\n",
+ " 0.97529974 1.07293888 0.89579682 1.12471282 1.05079676 1.06479434\n",
+ " 1.10388569 1.05454199 1.08868455 1.01155548 0.75796333 1.05108877\n",
+ " 1.01184285 0.9221099 1.04615021 1.23777058 1.01306592 0.87447406\n",
+ " 1.0056552 1.02213813 0.9628696 0.95176954 0.92195868 0.92589372\n",
+ " 0.94173811 0.95685675 0.936856 0.91443909 1.01259761 1.13564631\n",
+ " 1.08363326 1.13797314 0.93452242 1.04259107 0.9028165 1.03835095\n",
+ " 0.98988589 1.1532492 1.11142004 1.07981601 1.05956999 0.99421628\n",
+ " 1.09775178 0.98814517 0.97471704 1.17084441 1.00754041 1.06322675\n",
+ " 0.97705918 1.14216574 0.98732065 1.16913286 1.02460987 1.01453294\n",
+ " 0.85051374 0.90164168 0.75500349 0.93769278 0.99794825 0.67571398\n",
+ " 1.0697018 1.05094659 1.09084101 0.98633418 1.05337988 1.06408551\n",
+ " 1.09490506 0.91797621 0.92605609 1.13372687 1.00254043 1.04656997\n",
+ " 1.05881267 1.01958265 1.02282209 1.06794705 1.08145399 0.88131033\n",
+ " 1.18920747 1.02847709 0.98874637 1.15855894 1.11623241 1.11365645\n",
+ " 1.20280133 0.94716725 0.92448238 0.94697905 1.14526747 1.09326496\n",
+ " 1.02560705 0.93691396 1.05218455 1.01606935 1.01837208 1.01962053\n",
+ " 1.06717391 1.10738692 1.09184853 1.05044281 1.04978917 0.98921946\n",
+ " 1.07673994 1.10179733 1.13793322 1.20149239 1.16039546 1.19746403\n",
+ " 1.20251259 1.06963263 1.03985452 1.22618904 1.08471812 1.11429027\n",
+ " 1.02439602 1.18757639 1.15529956 1.18928422 1.08928541 1.13921254\n",
+ " 1.07755546 0.83842614 1.02978928 0.99067156 1.00432117 0.87413315\n",
+ " 0.9867311 1.01350094 0.92706312 1.08906537 1.0535204 1.08342161\n",
+ " 1.07943138 0.87403037 1.00562954 1.06666096 0.98922332 1.01362779\n",
+ " 1.08242069 1.04911772 0.97198061 0.90712231 1.01643754 0.98983685\n",
+ " 0.90092135 1.00485868 1.08817852 0.97958877 0.86093198 1.01270715\n",
+ " 1.06322836 0.87723456 0.75863909 0.85765349 0.86777915 0.91646267\n",
+ " 1.02064552 0.85726629 0.92339631 1.09390577 1.06696461 0.87206861\n",
+ " 0.84478102 0.79036395 0.94467204 1.13688743 0.79332937 0.9472456\n",
+ " 1.09285191 1.03885768 0.96989904 0.94336486 1.05973678 0.94492703\n",
+ " 0.93686459 1.02778933 1.10049902 1.06883023 1.07990608 1.09724396\n",
+ " 0.94154839 0.79522577 0.8649054 0.80816119 0.97008679 0.93918958\n",
+ " 0.6002156 0.98324281]\n",
+ "Distances (cosine): [0.79377923 0.84511488 1.04353481 1.02314329 0.7784911 0.81654495\n",
+ " 1.03197819 0.90592426 0.35509702 0.94065102 0.89271501 0.98496283\n",
+ " 0.9547756 1.08805272 0.897365 1.04788803 0.8024904 1.08267315\n",
+ " 0.73421394 0.90004238 0.72860143 0.93286173 0.65891005 0.78264038\n",
+ " 0.94245686 0.979172 0.95123953 0.95499859 1.12591085 0.90164634\n",
+ " 0.91346381 0.6735477 0.92113651 1.09470148 0.81614275 1.04215476\n",
+ " 1.02110069 0.92479474 0.81371484 0.77837078 0.7225767 0.87335875\n",
+ " 0.68908152 0.92316978 0.8921393 0.91176776 0.78991601 0.8811318\n",
+ " 1.04542312 1.00997 0.77236129 1.12375601 0.6917431 0.76603929\n",
+ " 0.86568562 0.88633667 0.82571964 0.76683808 0.86589262 0.88313874\n",
+ " 0.86195698 0.85292808 1.00466475 0.89765768 0.93442691 0.92721858\n",
+ " 0.99146445 0.92929562 0.88701728 0.87045559 0.87244789 0.8859439\n",
+ " 0.7950661 0.84497304 0.97596484 1.03572493 0.92607184 0.77031285\n",
+ " 1.03257968 0.74266591 0.98895215 0.9387461 1.1374819 0.94985102\n",
+ " 0.96559724 1.05106025 0.9960956 0.97224705 1.0142558 1.04205634\n",
+ " 1.14447632 1.07766628 1.11640751 1.05751929 0.84833912 1.12904737\n",
+ " 0.84903635 0.78581259 0.77479666 1.07084306 0.82596477 1.00735514\n",
+ " 1.03113807 0.93347599 1.18816811 0.80498206 0.87964455 1.08737448\n",
+ " 0.80550482 0.8649632 0.90401972 0.96314645 0.92341858 0.89219544\n",
+ " 0.90619392 0.89775644 0.83966164 0.95489495 0.789047 0.9660701\n",
+ " 1.10734387 0.75363676 0.82005423 0.99002856 1.11250083 0.69001789\n",
+ " 0.98490183 0.86574345 1.07533724 1.07308856 1.0469921 1.08064066\n",
+ " 1.06734506 1.07011555 1.06200297 0.99925887 1.06087619 0.90380705\n",
+ " 0.99727887 1.03840539 0.97528677 1.08132852 1.06277997 0.77771114\n",
+ " 0.84807426 0.88447404 0.88371258 0.76206474 0.77746081 1.08907374\n",
+ " 1.07253778 1.07206972 1.0523356 1.13440809 1.24218066 1.09568854\n",
+ " 0.97416499 1.02003222 0.94946839 0.89951724 1.05257643 1.09901662\n",
+ " 0.98286421 0.77847875 0.63512399 0.70523938 1.0832179 1.04548035\n",
+ " 0.99187503 1.08074362 0.89344592 1.0034867 0.9909526 0.81041342\n",
+ " 0.89015763 0.66499386 1.06710314 0.80135555 1.13733216 0.80041616\n",
+ " 0.82599972 1.08522738 0.93429613 0.96775723 0.73520166 0.99964495\n",
+ " 1.00993981 1.1639956 0.88179834 0.96874489 0.66140489 0.64523535\n",
+ " 0.78957577 0.85438113 0.73458138 0.85598959 0.83368027 0.86301537\n",
+ " 0.7660124 0.86197708 0.90880276 0.85669436 0.77694036 0.9057352\n",
+ " 0.59231941 0.68609393 1.06835234 1.02031892 0.67960027 0.67163289\n",
+ " 0.7363616 1.0320741 0.43935722 0.59450233 0.93307786 0.77621832\n",
+ " 0.6670274 0.97377734 0.88159051 0.97416993 0.89331681 1.06633403\n",
+ " 1.00066994 1.05958348 1.06498759 0.88399592 0.93896777 0.97457044\n",
+ " 0.91499811 1.11707712 0.70635897 1.06058492 0.61336059 0.8728544\n",
+ " 0.71294139 1.06531668 0.70917885 0.95035566 0.95033744 1.0365458\n",
+ " 1.06438182 0.73903954 0.84997651 0.95513918 0.92507693 0.92393026\n",
+ " 0.84476342 1.01050567 1.00748422 1.00667892 0.94270071 0.98604309\n",
+ " 0.93855739 0.83329911 0.82646335 0.81842873 0.87438944 0.88726777\n",
+ " 0.97547058 0.84512359 1.10120203 1.02079835 0.71546621 1.01857832\n",
+ " 0.93973101 0.35820669 0.43090605 0.78401955 0.62771236 0.90226637\n",
+ " 0.76915204 1.00559797 0.92189308 0.89765253 0.79622621 1.01323812\n",
+ " 0.39071492 0.66306826 0.71021017 0.897919 0.6684526 1.01278676\n",
+ " 0.88893569 1.1586956 0.82209324 0.94833964 0.98303217 0.67593659\n",
+ " 0.91106865 1.05392563 1.03817585 1.04336623 0.68085444 0.99961924\n",
+ " 0.96949003 1.16263646 0.76239217 0.79888488 1.00826539 0.65290545\n",
+ " 0.89017268 0.8767825 0.80246884 0.84308219 0.88660654 1.02854207\n",
+ " 0.45099801 0.51045045 0.61736395 0.69347606 0.77163913 1.04047237\n",
+ " 0.80225443 0.64174002 0.9183555 0.60762186 0.80518997 0.80724265\n",
+ " 0.9311875 0.88302012 0.81055845 1.02207594 0.998325 0.91300033\n",
+ " 0.93917786 0.75640397 0.97842832 0.887164 1.13279897 1.08077639\n",
+ " 1.07270541 1.1161289 1.02500912 1.10343557 0.95162515 0.98434637\n",
+ " 0.86856111 0.93023513 0.97793608 1.07024577 1.06574188 1.01717099\n",
+ " 0.90693823 1.02026594 0.87813848 1.00040276 1.04674674 1.05922604\n",
+ " 1.09390171 1.19395564 1.03198038 0.95640955 0.77978228 0.87773987\n",
+ " 0.70468028 0.84569914 0.6919281 1.13930737 0.64204632 0.87013306\n",
+ " 0.75911492 0.96781477 0.92518759 0.91522132 0.57432383 0.74084981\n",
+ " 1.07830669 0.93686931 0.91512978 0.84141006 0.6925752 0.90580502\n",
+ " 0.81058632 0.80628637 0.96072791 0.89623029 0.83895134 1.03635416\n",
+ " 1.01291041 0.92958626 1.10036607 0.8433536 0.97620536 0.84480909\n",
+ " 1.00115351 1.02835772 0.84315595 1.04671078 0.94514952 0.87565808\n",
+ " 0.97878669 0.71712046 0.86761036 0.82366785 1.12816417 0.54338535\n",
+ " 1.23133379 0.8443738 1.10201507 1.06371922 1.1610075 0.98761391\n",
+ " 1.03295178 1.01961894 1.12641414 1.02359875 1.11129017 0.89457424\n",
+ " 0.91085043 0.96179961 1.0685417 1.11330743 0.90323326 0.95654209\n",
+ " 1.11409259 0.842797 1.17587809 0.82179262 0.95036271 0.91799227\n",
+ " 1.01248933 1.04471321 0.9903102 1.049056 0.96286264 1.04060533\n",
+ " 0.86361363 0.83808872 0.89448728 1.10685404 0.86857824 0.97841358\n",
+ " 0.87713798 1.01134646 1.02557009 0.98177378 0.89329245 0.87442265\n",
+ " 0.88196559 0.85971519 0.721007 0.87166598 0.92914679 0.99903028\n",
+ " 0.91516036 0.99242205 1.04030249 0.93457009 1.12654171 0.98011889\n",
+ " 1.13392578 1.03740723 0.88850686 1.13402036 1.02018061 0.99807728\n",
+ " 1.12903986 0.95930094 1.03601175 0.99623213 0.98926687 0.95030544\n",
+ " 0.94833886 0.65108075 0.8044386 1.05373274 0.72222036 1.02082896\n",
+ " 0.71968653 0.98134342 1.05812225 1.06579809 1.07216369 1.24263739\n",
+ " 0.87659589 0.9299808 1.00500719 0.86633974 0.42979173 1.02669588\n",
+ " 1.11188179 1.03184952 0.89349942 0.72075745 1.08112645 0.80162359\n",
+ " 1.06419238 0.60704796 1.09044025 1.0206672 1.09525445 0.72892774\n",
+ " 0.92014092 0.93213931 0.93654482 0.86436494 0.98646593 1.03365684\n",
+ " 1.06527873 1.05485911 0.8088211 0.89212748 1.052226 0.93851732\n",
+ " 1.01277612 1.00429412 0.58876666 0.80042127 0.90607665 0.93391198\n",
+ " 0.81000413 0.90904291 0.96353846 0.96339143 1.0178133 0.98240888\n",
+ " 1.10318396 0.74415823 0.74536339 0.95256385 1.09742215 1.08817483\n",
+ " 0.95291813 1.0956764 1.1501048 1.1250106 1.13184386 1.13985464\n",
+ " 1.0318899 1.18980659 1.16978245 1.14108042 0.83408944 0.83920952\n",
+ " 0.76184941 1.15382051 0.96616938 1.05072708 1.18019412 0.99463747\n",
+ " 0.83821661 1.07351592 0.70237043 0.91422827 1.05700333 1.05083962\n",
+ " 0.8355144 0.87578291 0.99749883 0.92657083 0.80012114 0.93679934\n",
+ " 0.77833677 0.91498076 0.91073303 0.9011318 1.07441864 0.72016241\n",
+ " 0.80542364 0.89466006 0.61541166 0.81631268 0.38344518 1.17734014\n",
+ " 1.1182954 0.94961365 0.95259953 0.952703 1.09238144 0.86694939\n",
+ " 0.94411681 1.15486972 1.03022934 1.04562338 0.74619344 0.8862082\n",
+ " 0.9988765 0.81226441 0.92434132 1.07845713 1.03631469 0.86565418\n",
+ " 0.91509932 0.90095683 0.78828725 1.13765517 0.72209985 0.78015085\n",
+ " 0.86745739 0.85392562 0.81134805 0.94901872 0.74771744 0.87664448\n",
+ " 0.9487431 0.94695172 1.04571049 0.77078626 0.7014322 0.88622092\n",
+ " 0.89697477 0.96418283 1.11117632 1.02270486 1.15232261 1.16404042\n",
+ " 0.54437643 0.83872541 1.09410234 0.99114682 1.10898017 0.92804036\n",
+ " 1.03804605 0.87468663 1.17750823 1.09443191 1.01713943 0.84109882\n",
+ " 1.14320478 1.12006731 1.0101061 1.10864952 0.800911 0.78575326\n",
+ " 0.90786821 0.72341924 0.92863417 0.97965122 0.94196835 0.94552929\n",
+ " 0.89450745 1.10964551 1.10833115 1.17161076 0.95751038 0.7275626\n",
+ " 0.76739801 1.01137415 1.02126176 0.76580038 0.90275726 1.07719558\n",
+ " 1.17301883 1.07834549 0.91515952 1.305638 0.96508431 0.80298527\n",
+ " 0.97446924 1.00291281 0.82523247 0.86439797 0.93464455 0.84776342\n",
+ " 1.02528123 1.04426563 0.9677613 1.03695951 0.97564 1.01285923\n",
+ " 0.90810489 1.00402457 1.03319965 1.05486037 0.8442116 1.17975675\n",
+ " 1.08875158 0.97354155 0.72205246 0.67981724 0.6979567 0.87086303\n",
+ " 0.90533805 0.33496469 0.41450096 0.87710891 0.70909963 0.78122453\n",
+ " 0.84276757 0.65895297 0.75780094 0.86242535 0.52307143 0.73491328\n",
+ " 0.67820557 0.79631289 0.98021096 1.00517558 0.94751527 0.76520278\n",
+ " 1.04040576 0.74359536 0.79090394 0.71056795 0.90715007 0.70978451\n",
+ " 0.73903295 0.95632175 0.99005478 1.11526055 1.15154528 1.03789063\n",
+ " 0.96348991 1.02205416 0.96080271 0.97691643 0.97032511 0.74486693\n",
+ " 1.08987067 1.19891628 0.88815843 0.9470519 1.15958828 1.0976111\n",
+ " 1.05219107 0.94376756 0.78063034 0.73139362 0.93125614 0.67141133\n",
+ " 0.34501842 0.72245062 1.01867693 1.084953 1.01753439 0.95348023\n",
+ " 0.8840522 1.06160859 1.08587714 0.94988497 0.8976729 1.13208249\n",
+ " 0.6990244 0.87452904]\n",
+ "Distances (cosine): [1.04534682 0.83449225 1.01881871 1.20422125 0.88390344 0.50633979\n",
+ " 0.80166418 0.93899871 0.89463848 1.1067674 1.14282617 1.24423385\n",
+ " 1.13441173 1.14921422 0.85064511 0.63975606 0.73705031 0.82303183\n",
+ " 0.82085985 0.93002399 0.72586019 0.75952515 0.74355475 0.89328724\n",
+ " 0.92281185 1.08914824 1.06464707 1.01587924 1.00087741 1.10455693\n",
+ " 0.86397886 0.93540472 1.00359993 0.9734764 0.93067889 1.14574301\n",
+ " 1.09539759 0.90983352 1.10573127 1.1043833 1.03928411 0.68565249\n",
+ " 0.61442803 1.04485213 0.65680382 0.89262063 0.76547883 0.60197329\n",
+ " 0.78169802 0.94285067 0.83374858 0.80524751 0.79574854 0.98955976\n",
+ " 0.55378029 0.76972653 1.01082235 0.75126782 1.06095287 1.14661249\n",
+ " 1.17649452 1.16253703 0.7743184 0.93578214 0.97200366 1.07798857\n",
+ " 0.81883198 0.88189776 1.01626758 0.61770969 0.75411874 0.81292612\n",
+ " 0.66018628 1.00594605 1.03370743 1.06801374 0.99700062 1.04980414\n",
+ " 0.62596288 1.00655983 0.69305425 0.83221884 0.60974065 0.68457561\n",
+ " 0.76201676 0.78958613 0.95512487 1.11593361 0.80679338 0.78837296\n",
+ " 0.90380893 0.79525611 0.98962489 0.94698909 0.72460231 0.90784453\n",
+ " 0.72610109 0.7055898 0.81496508 0.65447061 0.66881266 0.79862517\n",
+ " 0.52202087 0.70694762 1.14093738 1.01215033 0.67769648 1.02014617\n",
+ " 0.67189287 0.84493309 1.05786808 0.57816003 1.00712903 0.94309685\n",
+ " 0.79630824 0.60873189 0.70025097 0.43265677 0.80141504 0.93795547\n",
+ " 0.80381175 0.70583708 0.53975243 0.98473081 0.91992756 0.77637376\n",
+ " 0.92485565 0.88907916 0.64588226 0.68398393 0.89283598 0.75041362\n",
+ " 1.03002239 0.71664971 0.90646761 0.91122764 0.94715906 0.93144261\n",
+ " 0.95053633 1.0270605 0.99795198 1.16379636 0.87516108 0.77825713\n",
+ " 0.86977061 0.92890916 0.75890635 0.88593467 0.82083743 1.18285073\n",
+ " 1.25116541 1.05649097 0.9897406 0.87597708 0.9480377 1.16550924\n",
+ " 1.04141481 1.12804056 1.18002195 1.16660758 1.22788299 1.1823521\n",
+ " 1.18655586 0.89444166 1.10514281 1.13772722 1.01222265 1.21479608\n",
+ " 1.00363198 1.12667252 1.16725641 1.24152515 1.29195014 1.02317669\n",
+ " 0.98778808 0.86404917 0.48948512 0.95015464 0.59619479 0.8409988\n",
+ " 0.67208096 0.80926598 0.92303296 0.45661038 1.03248007 0.81232052\n",
+ " 1.13639843 0.92835202 0.85845298 0.89437612 0.69007434 0.88936411\n",
+ " 0.59448023 0.91749758 1.00103566 1.00698363 0.81120153 0.94210757\n",
+ " 1.01495156 1.03067602 1.05664033 1.11954223 0.98658274 0.89921381\n",
+ " 0.88758646 0.9705695 1.09133338 1.0132542 0.97851028 0.84431074\n",
+ " 0.84197033 0.88272569 0.73502187 0.72238603 1.21699013 0.88887705\n",
+ " 1.05416695 0.91501961 0.92663417 0.83927019 0.8549431 1.30954559\n",
+ " 1.26947635 1.27551206 1.08353029 0.82193126 0.81914111 1.07841823\n",
+ " 0.76491672 0.79130334 0.81250934 0.74624751 0.94339441 0.83428629\n",
+ " 0.73914685 0.61665867 1.00500602 1.14556565 1.12166183 1.07128113\n",
+ " 1.04778574 0.87217059 1.03076114 0.80669378 1.00805261 1.10661284\n",
+ " 0.95224318 0.81725271 0.82773444 1.10932531 1.09989331 0.80221199\n",
+ " 1.09616763 1.18055908 1.13904818 1.15372267 1.19273367 0.93040204\n",
+ " 1.21037939 0.83637363 0.99369737 0.95276903 0.6048119 1.01960734\n",
+ " 0.95055756 0.78817089 0.85710299 1.03757536 0.95435961 0.58501312\n",
+ " 1.14008095 1.29330558 0.97783305 1.14624861 1.13438343 1.07621417\n",
+ " 0.70333287 0.61067955 0.52483881 1.01329219 0.33860219 1.20829933\n",
+ " 0.78375426 0.94405198 0.77110729 0.93390717 1.11165004 0.65952427\n",
+ " 0.71068666 0.56275184 0.54643214 1.06807586 0.62405856 0.77378632\n",
+ " 0.96282782 0.76329189 1.10285218 1.13040578 1.1274189 0.84568461\n",
+ " 0.90721833 1.12338988 0.903244 1.01658071 0.87568573 1.31391162\n",
+ " 0.74933227 0.70116898 0.88362748 0.88814001 0.97449582 0.99530486\n",
+ " 1.06269711 1.01613418 0.93361863 0.67527353 0.95088384 1.04925904\n",
+ " 1.12397471 0.78389179 0.95054343 1.23150494 1.01947201 0.63399173\n",
+ " 0.93272953 1.02730567 0.80193835 0.80967406 0.9087958 0.97602037\n",
+ " 1.13166399 0.78308303 1.11553547 1.09524507 0.99322541 1.21201326\n",
+ " 0.76516201 1.00481471 0.89099116 0.86756071 0.93614261 0.97262688\n",
+ " 1.1457529 1.12048512 1.00009068 0.95412648 1.0259919 1.06717773\n",
+ " 0.98429951 1.04934987 1.07239443 1.06048006 1.14849222 1.01428635\n",
+ " 1.14203967 1.10677876 0.74371732 0.94638552 0.89067333 0.8295882\n",
+ " 0.99296511 0.81018981 0.86617648 1.14017717 0.84511235 1.02163549\n",
+ " 0.88385637 0.80771544 1.05851662 0.88042816 0.90649228 1.05013947\n",
+ " 0.8841403 0.91753887 0.93927526 1.14956387 0.93105488 0.97519571\n",
+ " 1.0120718 1.03055226 1.18835806 1.19517603 1.02360029 0.88943948\n",
+ " 1.01156252 1.07922678 0.91323345 0.92382424 0.8995658 0.60822399\n",
+ " 0.7007303 1.12502956 1.14517069 0.79196489 0.76838794 0.74046771\n",
+ " 0.92923849 0.81664571 0.94835659 0.92631228 0.89996565 1.18064574\n",
+ " 1.17790481 1.10844171 1.17163504 0.92763731 1.01930924 0.81923484\n",
+ " 0.70817812 0.96831796 0.82864176 0.95956478 1.06173846 0.79418962\n",
+ " 1.26873413 1.04670846 1.07273889 0.81975422 0.82206944 1.11371286\n",
+ " 1.12708361 1.12332817 0.98001306 1.14902377 0.99258595 0.86893115\n",
+ " 0.55300158 0.85702421 1.06022226 1.00246518 0.54888009 0.51069006\n",
+ " 0.84943509 0.51257126 1.06074035 1.06351955 0.99853674 1.01770872\n",
+ " 0.89748343 1.07005492 1.12751406 1.09755518 1.10279629 0.97398764\n",
+ " 1.14648714 1.08517691 1.0993587 0.98512419 0.99980694 1.17311488\n",
+ " 0.9474967 0.99599463 0.70839356 0.87928033 1.00521035 1.16537576\n",
+ " 1.01851286 1.14014696 0.8994348 0.98425736 0.64800205 1.21153679\n",
+ " 0.73376358 0.68546077 0.85940779 1.10767382 0.60017958 0.97613384\n",
+ " 0.84600848 1.13848258 0.99005333 0.93091833 0.93911247 1.04523212\n",
+ " 1.24269866 0.87507664 0.53619816 0.86124004 0.79781198 0.82471069\n",
+ " 0.86982312 1.1247093 0.90858526 0.80614262 0.94079238 0.83152756\n",
+ " 1.08690995 0.7769924 0.84533395 0.90643842 0.82027779 0.9803175\n",
+ " 1.05138565 1.04845194 1.11024093 1.09649319 1.20142497 0.78523217\n",
+ " 0.83501554 1.09263272 0.63797344 0.72377907 0.96401287 0.97809939\n",
+ " 0.91145021 0.88962243 0.68506623 1.07969542 1.20732737 1.157425\n",
+ " 0.87283946 1.09849403 1.06423024 0.94716247 1.09491586 0.96100161\n",
+ " 1.00484268 1.15389972 0.97082682 1.08822931 0.94049406 0.52066138\n",
+ " 0.81735354 0.92857756 1.17488624 0.79962358 0.69812638 1.11325965\n",
+ " 0.96402694 1.35227801 1.26508196 1.24526343 0.82565041 0.97077339\n",
+ " 1.09024583 0.99405924 1.03222308 0.99686687 0.93302086 1.01724994\n",
+ " 0.99297733 0.88871991 0.86493841 1.16392706 0.98768534 0.9925434\n",
+ " 1.00338824 0.38941011 1.15951719 1.09821636 0.83267225 0.58208267\n",
+ " 0.6284753 0.93208455 0.74056203 0.50512028 0.99414033 0.74490123\n",
+ " 1.04879451 0.90993356 0.63688804 0.86070677 0.79579031 1.00465159\n",
+ " 0.76445638 1.13761298 1.13520614 1.05156847 1.13718849 0.95721637\n",
+ " 1.16612984 1.01416769 0.93725312 0.97166071 1.09056446 0.70421969\n",
+ " 0.84515732 0.50105773 0.85111894 0.91583942 0.99594441 1.02559688\n",
+ " 0.98760969 0.70824655 0.93072976 1.22663842 1.01180287 1.035419\n",
+ " 0.82134197 1.04550548 1.00768757 0.84894967 0.53990856 0.90240712\n",
+ " 1.00219131 0.80319395 0.95815204 0.65537896 1.12173456 1.11943658\n",
+ " 1.11260734 1.08731833 1.03902341 1.07550324 0.66833685 1.26655488\n",
+ " 0.92451854 0.97226992 0.97394181 1.00776892 1.01328708 0.98373986\n",
+ " 0.81837284 0.78109509 0.85626548 0.76689324 1.0184734 1.068447\n",
+ " 1.19118289 1.2117979 0.9383075 1.19423213 0.82954493 1.01808435\n",
+ " 0.99733926 1.0611633 1.28882417 0.71170285 0.98044159 1.06359306\n",
+ " 1.03308093 1.11940472 1.24407655 1.19354694 1.05741523 0.94838573\n",
+ " 0.81631737 0.85772831 0.76025462 0.68924807 1.04575369 1.0928422\n",
+ " 1.06723443 1.06767989 1.21044634 1.09300157 1.05354402 1.00005243\n",
+ " 0.78183794 0.89168471 0.98514474 1.1966679 1.26227033 1.27078105\n",
+ " 1.31079156 1.05884836 1.25154656 1.28569648 0.99333707 1.22597844\n",
+ " 1.12550864 1.35532285 0.98780398 0.96869749 0.99187543 0.83959069\n",
+ " 0.69962162 0.6918884 1.09632142 1.05100187 1.12010049 0.6295884\n",
+ " 0.93327709 0.83275444 0.86167754 0.99813643 1.018953 1.05497157\n",
+ " 1.09611737 0.94565549 0.68851967 1.18237034 0.9440074 0.87380451\n",
+ " 0.99798593 1.1026808 1.11748527 0.90385294 0.80314876 0.76724933\n",
+ " 0.8027015 0.80739205 0.8973715 0.76179977 0.7991677 0.81283183\n",
+ " 0.85274101 0.8957888 0.87920984 0.92612447 1.01476137 0.85340603\n",
+ " 0.84018572 0.9632537 0.83335895 0.96299633 1.22374821 0.76416537\n",
+ " 0.92072461 1.23773021 0.93192888 0.58715839 1.14727755 1.20347987\n",
+ " 1.18039958 0.97774155 1.22645986 0.6827297 1.19051546 0.64655241\n",
+ " 0.75181214 1.0046754 0.99152322 1.05537546 0.97851677 0.92098297\n",
+ " 1.10707632 0.96631477 0.9165961 0.87464713 1.01050989 1.14624962\n",
+ " 0.84930686 0.67767044]\n",
+ "Distances (cosine): [0.82825715 0.62359626 0.99418404 0.77766986 0.52605233 0.97594027\n",
+ " 1.15272241 0.91784397 0.64261686 1.00216108 0.90878798 1.00009768\n",
+ " 0.96607713 1.27259229 0.60202528 1.08765909 0.93217597 0.86445893\n",
+ " 0.49823048 0.40703214 0.87464899 0.72162649 0.4367457 0.33916609\n",
+ " 1.02669423 1.17972145 1.1201921 1.19320868 1.24966553 1.04306109\n",
+ " 1.09800226 0.7227302 0.88590823 1.05822346 0.9403877 0.9666244\n",
+ " 1.0888294 0.83058004 0.85776046 0.89501822 0.88941475 1.00946694\n",
+ " 0.89103029 1.0951134 0.84080234 1.07193999 0.93938561 1.15777243\n",
+ " 1.16803023 1.15079601 0.70980863 1.30431199 0.63826874 0.43676509\n",
+ " 0.90275083 0.98834466 1.10892418 1.07810193 0.93052711 0.94197685\n",
+ " 1.02993145 1.2278289 1.08871582 0.97114848 1.03142096 1.00676926\n",
+ " 0.92681605 0.74661862 0.63355352 1.04526187 0.98797063 0.97088538\n",
+ " 1.06254358 1.01402486 1.02042732 0.97910534 1.00469039 0.89597561\n",
+ " 1.14698009 0.80079943 1.03314213 0.90101546 1.12577638 0.92307883\n",
+ " 1.03040616 1.07563806 1.15012739 0.92063261 0.91749398 1.14584539\n",
+ " 0.9825686 0.90139875 1.082403 1.01582277 1.02625036 1.1349136\n",
+ " 0.82069843 0.98121924 0.69913487 1.16099023 0.86032993 0.7438835\n",
+ " 1.01787291 0.98210699 1.06326245 0.7673374 0.95463315 1.20817819\n",
+ " 0.76018983 1.01412988 0.86505305 1.1523242 1.12402759 0.74792604\n",
+ " 0.67874265 1.0516867 0.69720518 1.15872328 1.11857394 1.02277201\n",
+ " 1.16639151 1.08794582 0.94959075 0.96594236 1.17103131 0.95288232\n",
+ " 0.96886764 0.98076841 1.26025723 1.15271478 1.05524933 1.15845134\n",
+ " 0.97560478 1.21231191 1.00898488 1.16022135 1.2215126 0.77371375\n",
+ " 0.9358835 1.04143417 1.07548799 1.05472926 0.9278626 1.09783274\n",
+ " 0.96140159 1.02268277 0.63609512 1.07861788 1.00091304 1.08764272\n",
+ " 1.11733778 1.07516763 1.16333391 1.23496303 1.22913734 1.06533215\n",
+ " 1.00636774 1.02115576 1.02640577 1.09262709 1.02311979 1.06341619\n",
+ " 0.9671017 0.73401764 0.73118012 0.79022826 1.15174238 0.89872528\n",
+ " 1.15151796 1.04456151 0.95007271 0.8404472 0.70162236 0.71018546\n",
+ " 1.07650111 0.93134183 1.08044276 0.82809769 1.16115854 0.78143183\n",
+ " 0.9814125 1.02492827 0.71366339 1.02863109 0.61822106 1.00530082\n",
+ " 1.01031585 0.99182608 0.96562725 1.01296653 0.9137607 0.87613672\n",
+ " 1.00130004 1.03108626 0.91844041 0.93955793 0.95558833 0.85446313\n",
+ " 1.00057275 1.05435784 0.88478012 0.56609228 0.91957895 0.83366194\n",
+ " 0.47526659 0.33926172 0.98850638 1.05348425 1.01894807 0.96831791\n",
+ " 1.00614492 1.09645548 0.77776391 0.97286892 1.11355171 0.81814163\n",
+ " 0.86631941 0.94860733 0.84877635 0.92157772 0.84346681 1.0462804\n",
+ " 0.97938001 0.90605716 1.16448537 1.05686045 1.03184418 0.97171237\n",
+ " 0.94274689 1.09993827 0.94048533 1.08097811 0.41002363 1.10082138\n",
+ " 0.99476886 1.09940623 0.60839587 1.08686121 1.09582832 1.00061523\n",
+ " 0.94066168 0.46424254 1.03569256 0.80625447 0.93183815 1.1316273\n",
+ " 0.9218113 1.1285146 1.06333175 0.86953332 0.88405937 0.93055432\n",
+ " 0.76205487 0.88429242 0.90281743 0.86966408 0.84140403 1.21853639\n",
+ " 0.89812965 1.13285731 1.28525964 0.68303351 0.82489298 1.01671146\n",
+ " 1.03504462 0.8139996 0.7193002 0.97953702 0.97664268 1.22034105\n",
+ " 0.72608399 0.94264687 1.09306173 0.91910223 0.86787975 0.96599085\n",
+ " 0.72039849 0.9624797 0.88905622 0.72705659 0.86267864 1.00789605\n",
+ " 1.10051618 1.33434683 0.96097071 0.87129107 0.85598058 1.07526694\n",
+ " 1.20328568 1.069503 1.05567501 0.91168245 0.86862345 1.13475984\n",
+ " 1.0971439 1.12814873 1.00420693 0.94750218 1.05140952 0.80333984\n",
+ " 0.80500859 1.19949303 0.83217579 0.93004902 0.96807189 0.95374445\n",
+ " 0.67138639 0.78518247 0.90052132 0.52288245 0.71866269 1.14238894\n",
+ " 0.79645181 0.71069047 0.77103636 0.79982669 0.67306628 0.77688846\n",
+ " 1.05550589 0.87513748 0.74746807 0.92117694 1.05094447 0.99906812\n",
+ " 0.84833297 0.78409731 0.93032099 0.95759547 1.06283122 1.05323048\n",
+ " 0.77159876 1.0908598 0.75632751 0.83182145 0.95962812 0.70511334\n",
+ " 0.93631869 1.09873938 0.8298377 1.13219114 0.96074165 0.70836411\n",
+ " 0.81717877 0.93681523 0.86200076 0.93720661 0.81000767 0.93400173\n",
+ " 1.02848276 1.10122319 1.00569868 1.01669831 0.90273601 0.96579946\n",
+ " 0.56110175 0.87641592 0.96077639 1.198048 0.56502224 0.91334753\n",
+ " 0.46392885 0.8003465 0.96563718 0.92670587 0.94552031 0.9968988\n",
+ " 1.11027497 0.99240151 1.05084806 1.00386988 0.84580533 0.8390339\n",
+ " 1.02654363 0.9147707 1.09931415 1.02729013 0.98379606 1.150763\n",
+ " 1.19327609 0.91482492 0.93135086 0.64925219 1.10511593 0.86997062\n",
+ " 0.97376226 1.01923433 0.88503922 0.9455116 0.69023453 1.08579463\n",
+ " 1.11892574 0.91176808 0.99747274 0.88165637 1.16195827 0.93096989\n",
+ " 1.06974043 1.16113558 1.0889458 1.07351041 1.20063954 1.12683425\n",
+ " 0.97239608 1.22248048 1.20187668 1.03069703 0.90565416 1.01290694\n",
+ " 0.95699106 0.75252228 0.97335244 1.05768348 0.83087484 0.87892526\n",
+ " 1.01044125 0.92092891 0.98431736 0.70414992 1.22473795 1.04345974\n",
+ " 0.98736442 1.25925532 1.23203181 1.04584218 0.87131524 0.93970377\n",
+ " 0.86258367 0.92224477 0.94608036 1.02466252 1.0414659 1.01672579\n",
+ " 0.8289922 1.00909338 0.73982749 1.07790465 1.03660743 1.00930707\n",
+ " 1.18434106 0.96273017 0.71154956 0.88693425 1.05186138 1.24266013\n",
+ " 0.89988265 0.98277378 0.91887344 0.90927767 1.16103685 0.86872051\n",
+ " 1.20603789 1.14009558 1.00044432 1.12493823 0.88591544 1.00015528\n",
+ " 1.1333014 0.89949119 0.92780633 0.88619928 0.92469826 0.84752799\n",
+ " 0.81274509 0.72236557 0.85631052 0.98513816 0.90545158 0.95847087\n",
+ " 0.69600982 0.99807273 0.92991193 1.10141191 1.09119059 1.27737642\n",
+ " 0.89132212 1.05595663 1.02025448 0.82015451 0.73454542 0.90792156\n",
+ " 1.24091625 0.83978481 1.02305678 0.90162498 0.9191056 0.91504499\n",
+ " 0.78459253 0.74918244 0.96153998 0.81756089 0.92711034 0.83946778\n",
+ " 1.04898197 1.06791171 1.01173785 1.1269901 0.83430914 1.03448334\n",
+ " 1.07285949 1.01476408 0.82373434 0.82461459 1.10580022 1.00176251\n",
+ " 1.07294887 0.89258661 0.83536603 1.03263071 0.80102573 1.01887429\n",
+ " 0.99509323 0.85969717 0.93811553 0.82849231 0.99228274 0.97501169\n",
+ " 0.88329543 0.84270955 0.89739778 1.04762181 1.26460847 1.01089502\n",
+ " 1.07846523 1.2286828 1.12350657 1.16070574 1.11603206 1.16141438\n",
+ " 1.13822465 1.20912851 1.21936441 1.0770431 0.98094572 0.76027234\n",
+ " 0.80966056 1.18574133 0.92702767 1.16617825 1.33797189 1.06796747\n",
+ " 0.95890457 1.14422138 0.9723609 0.81272344 1.04506258 0.923107\n",
+ " 0.96438104 0.94144166 0.61132969 0.95025584 0.92340204 1.05109962\n",
+ " 0.91649787 0.93611124 0.99987965 1.07933297 0.96571552 0.83580764\n",
+ " 0.99250019 0.77488103 0.75294748 0.91164421 0.80239226 1.1285267\n",
+ " 1.07165725 1.12870814 0.92801381 1.22235412 1.19196182 0.95610004\n",
+ " 0.86140024 1.17373702 1.07709108 1.06669327 0.62821346 0.64157675\n",
+ " 0.64690112 1.02322703 0.75325086 0.9081749 0.91942439 0.93889173\n",
+ " 0.72908857 0.97963826 0.86427103 1.09352116 0.66712905 0.68176539\n",
+ " 0.89188979 0.80140292 0.42833517 0.90241119 0.97305315 1.0379237\n",
+ " 0.83332893 0.84282262 0.98976113 1.08149677 0.75939386 0.80612504\n",
+ " 1.14469373 1.06698947 1.15352081 0.88976082 0.98962925 1.0866615\n",
+ " 0.62453253 0.62897427 1.03858627 1.02272007 0.96603381 0.87818425\n",
+ " 0.90201579 0.8489842 1.24221042 1.03636471 0.89936046 0.62107697\n",
+ " 0.85662281 0.84166876 1.08351185 0.78318576 0.93560128 0.9823907\n",
+ " 0.98315868 0.79788953 0.85460683 0.86620276 0.98268212 0.95649977\n",
+ " 0.9441433 1.02038212 1.00121709 1.04167492 0.80050287 0.79724619\n",
+ " 0.65358291 0.84397532 1.04102503 0.860017 0.70427594 0.93523037\n",
+ " 0.98285865 0.94082113 0.94876811 1.08225523 0.93924591 0.65699445\n",
+ " 0.94709814 0.97692793 0.6634247 0.7704766 0.85389506 0.86278256\n",
+ " 0.80900481 0.84602752 0.77154793 0.87354668 0.87871923 0.84424871\n",
+ " 0.77064003 0.75335991 0.91117674 0.85454361 0.96827834 0.97673803\n",
+ " 0.85661908 1.00551129 0.72746672 0.65621899 0.72150498 0.85311281\n",
+ " 0.82152782 0.59890798 0.48932766 0.75121662 0.82344239 0.70734347\n",
+ " 0.7984398 0.47430001 0.72101164 0.70462178 0.57875548 0.84946097\n",
+ " 0.6865923 0.76073119 1.23225192 1.00343381 0.99084845 0.86849289\n",
+ " 0.78272645 0.48760116 0.35429222 0.80698988 0.72581087 0.41524584\n",
+ " 0.32238085 1.03558281 1.07087362 1.14821339 1.15382093 1.06582094\n",
+ " 0.97257575 1.05934412 1.04148047 0.93952419 1.13682602 1.12304585\n",
+ " 1.07665594 1.14737215 1.09105077 1.08893984 1.06117688 1.10570784\n",
+ " 1.06389163 0.87927196 0.96261583 0.83070419 1.04584175 0.83055773\n",
+ " 0.6631257 0.78156765 0.97618 1.03462887 0.92666629 0.98247521\n",
+ " 1.04704954 1.12961627 1.17864863 0.95675306 0.9824265 1.04215771\n",
+ " 0.63971859 1.17446935]\n",
+ "Distances (cosine): [0.9843836 0.64189458 0.90593079 0.79249187 0.66220272 0.94477823\n",
+ " 1.07966571 1.02839812 0.73932091 1.16446425 0.9566686 1.03778053\n",
+ " 1.0063827 1.21377877 0.66284692 1.0082713 0.97556555 0.52656377\n",
+ " 0.57850922 0.72423347 0.88460245 0.37235654 0.47205801 0.69014156\n",
+ " 0.97562761 1.14955377 1.14437032 1.2218012 1.25368152 1.12185805\n",
+ " 1.12432393 0.93577513 0.95293207 0.99020976 1.04840308 1.01781334\n",
+ " 1.08593222 0.97274349 1.12249192 0.9980201 1.0095202 0.95951341\n",
+ " 0.87168549 1.07886419 0.7031066 0.99943714 0.89798015 1.1010914\n",
+ " 0.99537304 1.01618712 0.58705232 1.05620562 0.50077789 0.58255489\n",
+ " 0.82086668 0.89000573 1.23077364 1.18570584 1.08559501 0.97264989\n",
+ " 1.02866667 1.15927942 0.97989779 0.99409706 1.06593524 1.01014897\n",
+ " 1.03675038 0.64835662 0.80866247 0.96832619 0.93246239 0.8973635\n",
+ " 1.13842324 1.00043224 1.08062984 1.01919734 1.10387608 1.01121687\n",
+ " 1.06932854 0.97332574 1.01364969 0.98024851 1.02712782 1.04413479\n",
+ " 1.07613066 0.99822492 1.17472228 1.13119282 0.9367471 1.20057992\n",
+ " 0.98377842 0.87806223 1.22230199 1.06360002 1.01410797 1.09743965\n",
+ " 0.79309421 1.00763369 0.58935065 1.05336568 0.8410245 0.78776487\n",
+ " 0.88248371 0.86184386 1.11962109 0.98990857 0.77192114 1.22500389\n",
+ " 0.84453 1.02013696 1.0556784 0.95420396 1.20658511 0.73529331\n",
+ " 0.57036819 0.8689323 0.5111902 1.01977806 1.24868239 1.10604913\n",
+ " 1.15433459 1.18435492 0.96226188 1.111127 1.22632863 0.79438884\n",
+ " 1.01590886 1.0031663 0.95096534 0.97223761 1.00104243 0.97741541\n",
+ " 0.86896093 0.99797549 0.89847764 1.16762214 1.19413817 0.83550321\n",
+ " 1.21142109 1.15562368 1.2676541 1.06724068 0.94236087 0.96609591\n",
+ " 1.08193639 1.03991504 0.65180306 0.98228905 1.04131332 1.20932155\n",
+ " 1.23185131 1.08652365 1.31822257 1.35050143 1.24590583 1.17828721\n",
+ " 1.11959092 1.16431423 1.14422474 1.13232935 1.07857799 1.04718823\n",
+ " 1.07551025 0.72773944 0.77541706 0.80834952 1.36396117 0.94218219\n",
+ " 1.22162312 1.10530331 1.14627518 1.13764938 1.0571938 0.77623045\n",
+ " 1.10878855 1.06758429 1.02900245 0.93668347 1.01881986 0.84680792\n",
+ " 0.93254506 0.90952951 0.8774964 0.85875556 0.82844085 1.07682099\n",
+ " 1.08129938 0.90533428 0.92649404 1.01456979 0.86685045 0.92299542\n",
+ " 0.93358202 0.83343163 0.96014196 0.79330087 0.92108871 0.80741422\n",
+ " 0.79263589 0.97605407 0.72508693 0.81616306 0.92128541 0.41635968\n",
+ " 0.44151244 0.55180216 1.02946865 1.05767777 1.11464884 0.95124177\n",
+ " 1.04443689 1.10460824 0.76077977 1.1264093 1.07911551 0.81222949\n",
+ " 0.87120135 0.95134161 0.84092628 0.85948464 0.81697379 1.10242688\n",
+ " 1.10211234 1.01333358 1.05114689 1.10152714 1.14737589 0.84494606\n",
+ " 0.90992254 1.01222933 0.94263248 0.91956159 0.67786354 1.20395035\n",
+ " 0.97697508 1.05404691 0.83978822 1.11741763 1.13767748 0.97652644\n",
+ " 0.91100251 0.56690178 1.02991411 0.7917062 1.00874637 1.23871541\n",
+ " 1.07177679 0.93677616 0.97564353 0.8507179 1.05803345 0.95607792\n",
+ " 0.84545519 0.9227359 0.9212579 0.91554157 0.83113876 1.1923238\n",
+ " 1.0696676 0.94417126 1.23954563 0.66025434 0.66371408 0.92933243\n",
+ " 1.05191405 0.70583156 0.77096309 0.92565032 0.89567264 1.16021345\n",
+ " 0.87717301 1.12691545 0.9539623 1.27023523 1.01716912 1.02398832\n",
+ " 0.69783536 0.99212493 0.90493951 1.08541575 0.88986987 1.12718312\n",
+ " 1.05789904 1.30849866 0.96479983 0.78907689 0.89783608 1.08820307\n",
+ " 1.05781154 0.92352531 0.87843075 0.88799135 0.69894404 1.04122657\n",
+ " 1.23422815 1.0412038 1.03411524 0.9525182 1.0389714 1.04381681\n",
+ " 0.88973071 1.16845719 0.77779044 1.05136145 1.04080579 1.03617468\n",
+ " 0.60952009 0.77576797 0.80625352 0.5104701 0.70350876 1.24059473\n",
+ " 0.96393193 0.6474535 0.7848625 0.70969958 0.65421422 0.85617924\n",
+ " 1.01282895 0.84334948 0.61030441 0.88309157 0.8843204 0.90002286\n",
+ " 0.77088033 0.91921854 0.98024783 0.9351283 1.18699458 1.26779642\n",
+ " 0.91634862 1.01337535 0.72435218 0.78541166 0.82291789 0.73472356\n",
+ " 0.87775186 1.05222362 0.76885186 0.99704827 0.90883699 0.89866107\n",
+ " 0.98895154 0.89422094 0.71589601 0.61528712 0.7406647 0.66479577\n",
+ " 0.6536945 0.72668448 0.65918091 0.98313019 1.01294371 1.00298639\n",
+ " 0.84315047 0.92624275 1.03541803 1.19693977 0.6962603 0.91367807\n",
+ " 0.60173728 0.85320189 1.03331591 0.95841739 1.02828298 1.08055231\n",
+ " 1.00971033 0.94513338 1.00809466 1.09412685 0.91625782 1.01063681\n",
+ " 1.14509843 1.07231933 1.14319765 1.06630079 0.87427445 1.15428864\n",
+ " 1.23779381 0.95138567 0.90741926 0.77010252 1.27657708 0.7329845\n",
+ " 0.91101229 1.01743997 0.82554836 0.93051701 0.72785781 1.0362052\n",
+ " 1.10469363 0.8502979 1.0135665 0.90547056 1.13742154 0.89444714\n",
+ " 0.94069295 0.94737478 1.12372953 0.9981688 1.01930019 0.95886181\n",
+ " 1.00038794 1.08091066 1.07327638 0.80932165 0.97448227 0.96139058\n",
+ " 0.85134366 0.81641544 0.90246415 0.86849652 1.02056207 0.81319095\n",
+ " 1.12291417 1.03096982 1.00572031 0.75541752 1.15559026 1.05746973\n",
+ " 0.97046561 1.07311871 1.03540914 0.93738694 0.91960416 0.91549344\n",
+ " 0.77142563 1.06414381 1.06645475 0.95528143 0.92393675 0.93751906\n",
+ " 0.91844055 0.94742418 0.83928196 0.79828877 0.90023393 0.91280385\n",
+ " 0.91748293 0.87544472 0.80097471 0.92333537 1.18813912 1.0975477\n",
+ " 1.02083086 1.07875334 0.93416027 1.00695775 1.04152021 0.98213238\n",
+ " 1.11999259 1.17064133 1.0133391 1.03358366 1.10498921 1.22231824\n",
+ " 1.11530428 0.92492925 0.60176275 0.59014792 0.89561103 0.99168126\n",
+ " 0.89557092 0.64489547 0.76785093 0.88603004 0.95860873 1.10609864\n",
+ " 0.67585931 1.07127985 0.88183997 1.11939602 1.09868306 1.2297142\n",
+ " 0.96960963 1.12692403 0.85206204 0.85361052 0.68326669 0.93073932\n",
+ " 1.14412584 1.04117489 0.86774485 0.91484424 0.95113871 0.85235129\n",
+ " 0.7908208 0.71875255 0.95647977 0.72807514 0.9303925 0.95089418\n",
+ " 1.08119579 1.09440353 1.07894049 1.05293919 0.94642214 0.83276288\n",
+ " 0.87746039 0.91792415 0.74408039 0.85948045 1.07132263 1.0275898\n",
+ " 1.14952088 0.80068487 0.91232118 0.96771368 0.80176459 0.95120046\n",
+ " 0.89215046 1.10175175 1.15415147 0.88742421 1.0765616 1.15348347\n",
+ " 1.01046585 0.9918624 1.09010237 1.07639014 1.22209062 0.95110557\n",
+ " 0.97332116 1.10179184 1.06165335 1.06004232 0.96643951 1.20377603\n",
+ " 1.27209199 1.27507605 1.21734571 1.20105465 1.00598862 1.01407634\n",
+ " 1.1024922 1.08476061 0.89385057 1.24350476 1.33614864 0.97768745\n",
+ " 0.99854881 1.20053354 1.03664058 1.02198765 1.1751063 0.93507605\n",
+ " 0.90496305 0.9113967 0.73267285 1.05790457 0.88084307 1.06244818\n",
+ " 0.94189695 1.04643744 1.04626054 1.12103184 0.86484108 0.83454725\n",
+ " 1.0875436 0.63742305 0.62041663 0.87994253 0.76856027 1.02443639\n",
+ " 1.00836862 0.95501685 0.95095894 1.04834397 1.03928442 1.02266042\n",
+ " 0.81323979 1.14047071 1.21070102 1.13473206 0.78075967 0.72336825\n",
+ " 0.66329469 0.87432563 0.78670005 0.86447706 0.80925242 0.91070465\n",
+ " 0.66784916 0.95176667 0.93927099 1.05362356 0.81598555 0.85979789\n",
+ " 0.85119325 1.01129707 0.59141727 0.91367738 1.01017776 1.17273021\n",
+ " 0.83761587 0.66803774 0.91196983 1.08026153 0.75582063 0.83068391\n",
+ " 1.17708562 1.16757662 1.14914777 0.9579413 0.79671284 1.12197871\n",
+ " 0.91173966 0.62519659 0.8890841 1.09345559 0.81607688 1.02506059\n",
+ " 0.83555397 0.95072444 1.05983872 1.01367652 0.90969718 0.73972481\n",
+ " 0.86668259 0.85281475 0.90405681 0.8145662 0.90461598 0.95171919\n",
+ " 1.05453061 0.96154925 0.94726478 0.82021558 1.11051102 1.09100204\n",
+ " 1.02486189 1.08631664 1.03762444 1.09353708 1.0579452 1.09413795\n",
+ " 0.56185933 0.809503 1.03033872 1.09057123 0.79190034 1.0861469\n",
+ " 1.06588143 0.9840204 1.11141795 1.02940047 0.9959645 0.83339356\n",
+ " 0.9637354 1.04161391 0.81159611 1.02444811 1.0224339 1.06902052\n",
+ " 1.14854207 1.08576482 1.05361037 1.11167726 1.06020578 1.14511333\n",
+ " 1.04934783 1.04952923 1.12036461 1.09039358 1.15533596 0.98584058\n",
+ " 0.82862253 1.03223004 0.93724356 0.92768615 0.95693477 0.74347306\n",
+ " 1.12405729 0.68981612 0.71261161 0.94167083 1.08521864 0.89939115\n",
+ " 0.96427403 0.69449424 0.89527541 0.98806704 0.82307468 1.19577153\n",
+ " 0.95781928 1.01084809 1.30869366 0.96608008 1.03841682 0.94995681\n",
+ " 0.45992677 0.59315619 0.69846593 0.86500162 0.38541379 0.48498631\n",
+ " 0.61918384 0.99942432 1.13856427 1.15599024 1.1070595 1.07840485\n",
+ " 1.03767312 1.06947302 1.03078803 1.20657479 1.12993119 0.91677506\n",
+ " 1.14409553 1.22381333 1.1946243 1.0626919 1.09643623 1.07376261\n",
+ " 1.12685922 0.88921523 0.93287532 0.71724748 0.97411937 0.63414431\n",
+ " 0.57780102 1.03370353 1.24375592 0.9236417 1.21003137 0.93991023\n",
+ " 1.13382357 1.03320155 1.15811684 0.98110558 0.9193943 0.98266731\n",
+ " 0.62870188 1.12300935]\n",
+ "Distances (cosine): [0.87690332 0.92317216 1.0425743 1.01004323 0.85344631 0.84774546\n",
+ " 1.05791853 1.11478178 0.82422152 0.94763475 0.9745045 1.13254928\n",
+ " 0.95943654 0.9896267 0.835505 0.99435745 0.26341436 0.78265025\n",
+ " 0.57642215 0.92727898 0.26484969 0.72579199 0.51594446 0.76698775\n",
+ " 0.76479389 1.00135408 1.04803815 1.13723827 1.16451173 1.16655497\n",
+ " 1.15770067 0.72637731 1.12441194 0.76742551 0.67117495 1.2020931\n",
+ " 1.12479107 1.15692161 0.99716345 0.95649667 0.86363399 0.98505575\n",
+ " 0.86761193 1.02134378 0.88653213 0.95724649 0.89437555 0.92806972\n",
+ " 0.93419474 1.13561531 0.60201109 0.89059517 0.63669616 0.94573762\n",
+ " 0.68344775 0.69579425 1.19732999 1.09690219 0.98203295 1.04265711\n",
+ " 1.14215032 1.23489304 0.91433629 0.805704 0.96087518 1.05956514\n",
+ " 0.82202733 0.82975769 0.9781637 0.99174646 0.93227987 1.05702294\n",
+ " 1.01889296 1.01467559 1.06097893 1.15348761 0.8826236 0.85669084\n",
+ " 0.60870645 1.03254867 0.87379885 0.85766081 0.79634073 0.66295546\n",
+ " 0.76443129 0.92905325 1.14913172 0.96918161 0.77950721 1.0268828\n",
+ " 0.93664399 0.64782367 0.79050822 0.98077493 1.02569014 0.7915314\n",
+ " 0.8089744 0.72330508 0.64791805 0.72337487 0.68797874 0.84070829\n",
+ " 0.79828483 0.86773295 1.11137128 0.89695017 0.62135514 0.9034587\n",
+ " 0.70089997 1.02695391 0.99622321 0.63896899 1.12708358 0.75094769\n",
+ " 0.59753181 0.79469656 0.61109364 0.78728662 0.77331129 1.0898665\n",
+ " 0.99349327 0.86693951 0.77446146 0.81924899 1.01934989 0.94236487\n",
+ " 0.94576703 0.79018552 0.91147566 0.73015892 0.74976755 0.73626589\n",
+ " 0.68901524 0.70080596 0.85143237 0.88036916 0.96068609 0.7893807\n",
+ " 1.03675925 0.97340899 0.86439062 1.09641065 0.98725001 0.65801885\n",
+ " 0.9458336 0.7961588 0.75427084 0.61799947 0.91924908 1.12511711\n",
+ " 1.06797077 0.96016192 0.95540977 0.80285499 0.84942986 1.20450548\n",
+ " 1.0201984 0.96395321 0.9631759 0.92062963 1.04146732 1.12242637\n",
+ " 0.97849005 0.85031071 1.03263935 1.08905424 1.04540113 1.09772885\n",
+ " 0.97162126 0.95683356 1.11230627 1.11875957 1.11644855 1.09779756\n",
+ " 0.92731834 0.80371262 0.75318826 1.09696501 0.77641174 0.89917148\n",
+ " 0.91159696 0.71259482 1.00284356 0.7366436 0.81354728 0.90174872\n",
+ " 1.20044 0.84155623 0.81703605 0.93485689 0.94617007 0.76627945\n",
+ " 0.88322145 0.96730846 0.77618514 1.00023434 1.11458712 1.01698237\n",
+ " 0.62710009 0.39911462 0.4619611 0.76364943 0.42092111 0.88328805\n",
+ " 0.66906405 0.88806082 1.21965492 1.12706117 0.7467624 0.85086964\n",
+ " 0.81257871 0.9430722 0.86244566 0.93295783 1.12285111 0.83449736\n",
+ " 0.91534863 0.91923288 1.05073488 0.90969685 0.81142773 1.27259883\n",
+ " 1.23676622 1.20261847 1.09322814 1.05981547 1.13607295 0.92979561\n",
+ " 0.82750346 0.76346154 0.81155805 0.77238385 0.78434612 1.06913608\n",
+ " 0.32301759 0.80589192 0.94838328 1.19496086 1.13776105 1.27495916\n",
+ " 1.200166 0.62990857 1.00523499 0.76959155 1.07183627 1.06220251\n",
+ " 0.90390376 0.80149597 0.90752828 0.94742703 0.86576441 0.99063707\n",
+ " 1.00094921 1.09652178 1.0619237 1.09423689 1.06596165 0.94649289\n",
+ " 1.04266602 0.85019426 0.95304358 0.72738928 0.7212436 0.67977618\n",
+ " 0.74871885 0.67316986 0.78910666 0.92365561 0.81629186 0.67962168\n",
+ " 0.93350766 1.15566132 1.04541209 1.12086324 1.02300564 1.22199361\n",
+ " 0.76721994 0.84023959 0.71466106 0.97022266 0.69942731 1.16690792\n",
+ " 1.10293717 1.07147337 1.06542137 1.12275698 1.16442995 0.72435854\n",
+ " 0.93140104 0.64620375 0.64690931 1.24263277 0.63875329 1.00663606\n",
+ " 0.95918781 0.81876694 0.86503838 0.85771607 0.91329075 0.91113259\n",
+ " 1.05829353 1.19187281 1.04925155 1.00953407 0.99584508 1.14119163\n",
+ " 0.72199764 0.8528871 0.43960865 0.61334877 1.08363804 0.99065808\n",
+ " 0.94134668 0.94560526 0.88357699 0.66733619 0.79132486 1.1870028\n",
+ " 1.04158419 1.05210641 0.6833739 1.21099477 0.95422214 0.90870254\n",
+ " 0.89418562 0.86462509 0.94662497 0.73427667 0.93220227 0.96667605\n",
+ " 1.0743047 0.80542906 1.0674335 0.99601692 1.00533342 1.01691356\n",
+ " 0.92236917 0.93969152 0.76356966 0.81105923 0.90311176 0.96821955\n",
+ " 1.02349221 0.9222341 0.85263295 0.85167775 0.91312021 0.90068203\n",
+ " 0.91096726 0.90713546 0.93385638 0.90212395 0.82319982 0.84064907\n",
+ " 0.96398139 0.99004118 1.05599377 0.93612722 0.47532773 0.84635026\n",
+ " 0.96394201 0.94589457 0.86988437 0.98898635 0.84546478 0.54910394\n",
+ " 0.87038368 0.7224472 0.94093337 0.81480473 0.55635164 0.9628762\n",
+ " 1.01198147 1.06236465 0.74350216 0.96503938 0.77122396 0.65357417\n",
+ " 0.78342463 0.79869559 1.10455939 0.99888694 0.62106453 0.83017652\n",
+ " 0.81299309 0.79810814 0.82263634 0.81150758 0.76990163 0.92188467\n",
+ " 0.89765486 0.65988882 0.57619615 0.86775375 0.60921966 0.57994597\n",
+ " 0.86847027 0.73488462 0.79678766 0.95175093 1.06580326 0.97055694\n",
+ " 0.84258348 0.90802306 0.95012052 0.80684013 1.01153979 1.13283347\n",
+ " 0.90457354 0.90922432 0.91214707 0.82302905 1.11611876 0.99856177\n",
+ " 1.10182915 0.976954 0.96392594 0.77701588 1.03345851 0.98745349\n",
+ " 1.08403244 1.17370996 1.09584785 1.05088665 1.00566409 0.85821945\n",
+ " 0.97959363 1.09946718 1.0542369 0.98482146 0.97513083 0.76632546\n",
+ " 0.97815744 0.78129417 0.97362777 1.03715023 0.97138203 1.01832681\n",
+ " 0.85269976 1.05485616 1.04152052 1.11115951 1.17633058 0.88280189\n",
+ " 1.11249105 1.07944241 1.08123364 0.91768877 1.06287553 1.19497083\n",
+ " 0.97266473 1.15115342 0.90833198 0.8071837 0.95783315 1.10748218\n",
+ " 1.07750718 1.03142902 0.92818649 0.95642888 0.90328747 1.10965506\n",
+ " 0.81579149 0.62228957 0.94446476 0.98751003 0.85155726 0.94207884\n",
+ " 0.87600769 0.90576798 0.96102863 0.99940886 1.03839776 1.00119487\n",
+ " 1.07990372 1.199895 0.65669287 0.95146196 0.7826262 1.02856173\n",
+ " 0.84298371 1.16002006 0.8575816 0.92135508 0.82156448 0.92916826\n",
+ " 1.03563443 0.84777377 0.77087883 0.7546757 0.82164484 0.80119746\n",
+ " 1.04559517 0.95358598 1.06369571 0.8966855 1.13041663 1.11089484\n",
+ " 1.10078121 1.15688725 0.74993792 0.74659025 0.98183839 1.03250932\n",
+ " 1.0474255 0.72552018 0.79118507 0.75977613 1.03464075 1.15753696\n",
+ " 0.92560585 1.0642587 1.07534222 0.78052277 0.93391381 1.22347534\n",
+ " 0.98229415 0.96892642 0.98153387 0.92891812 0.97706766 0.81114871\n",
+ " 0.88870776 0.93352461 1.10345809 0.86292989 0.77477245 1.11318783\n",
+ " 1.02210576 1.24904486 1.2102 1.1709875 0.98040439 1.04546482\n",
+ " 0.84193649 1.20207511 0.89676501 1.1846675 1.14353879 0.87470245\n",
+ " 0.89589876 1.22424656 0.978207 1.11042409 1.07964041 1.01872475\n",
+ " 0.87340834 0.781625 0.981001 0.87801525 0.82245312 0.94874581\n",
+ " 0.9327673 1.11923474 0.9453229 0.80534342 0.80294895 0.99245819\n",
+ " 0.81689325 0.80259435 0.69601662 0.84201884 0.72183549 0.93858045\n",
+ " 1.03532938 0.96894923 0.85832166 0.95246337 0.99838937 0.92699134\n",
+ " 1.06163427 0.87724479 0.92091255 0.70367245 0.92670097 0.59518325\n",
+ " 0.87726232 0.62793077 0.97201475 1.0715886 0.7934568 0.83930146\n",
+ " 0.87000832 1.05752424 1.14226832 1.1988814 0.79616412 0.82727091\n",
+ " 0.80329166 0.8707058 0.96394329 0.8110409 0.87715008 0.96189318\n",
+ " 0.85060154 0.90189729 0.93831211 0.96097526 0.9735498 1.04338571\n",
+ " 0.93009813 1.0821682 1.146727 0.75691052 0.72119961 1.06239313\n",
+ " 0.80914131 0.74985947 0.81031191 0.93276681 0.98332272 1.1377163\n",
+ " 0.81172144 0.82492761 0.81140181 0.8803619 0.84329988 0.9420492\n",
+ " 1.06099268 1.07570296 1.05184496 1.09196297 0.98125219 0.9130756\n",
+ " 1.02073677 1.0329944 1.15077694 0.85135127 1.06146554 1.11877369\n",
+ " 0.95533437 0.9547424 1.10839323 0.97151628 0.96805231 1.09339818\n",
+ " 0.58031016 1.07898664 1.09664186 0.9670847 0.925491 1.2002371\n",
+ " 1.15604225 1.13667383 1.22151273 1.05083122 0.98286264 0.91653363\n",
+ " 1.19388587 1.03960047 0.948397 1.10523817 1.12482324 1.05482468\n",
+ " 1.14557121 1.16403261 1.15585675 1.21223499 1.064555 1.16281828\n",
+ " 1.00785306 1.26268356 1.06275759 1.03860986 0.94149705 0.77292103\n",
+ " 0.80469667 0.73754905 0.98293335 1.0091822 0.90553896 0.85127832\n",
+ " 0.91397818 0.76767765 0.75768003 0.90627337 0.74472657 0.90086253\n",
+ " 0.89124741 0.88860119 0.89080734 1.13540797 0.83287743 0.80292313\n",
+ " 0.98837231 0.84125298 1.06301125 0.95461878 1.15299413 0.26237172\n",
+ " 0.75379072 0.55615359 0.76287212 0.26210771 0.71638969 0.59588792\n",
+ " 0.80032471 0.812189 0.96562687 1.07752524 1.00049483 1.00166179\n",
+ " 1.19862933 1.14695425 1.0239438 1.04264658 1.06432255 0.69139717\n",
+ " 0.87943712 1.07672026 0.73721459 0.9689589 1.15067512 1.04779745\n",
+ " 1.07526442 0.96899584 0.99448907 0.63236125 1.08408556 0.6104677\n",
+ " 0.7434803 0.99565699 1.03377629 0.94766023 1.03657986 0.7953639\n",
+ " 0.89971243 0.70413588 0.92634361 0.84282139 0.87024392 1.0335405\n",
+ " 0.77742679 0.98328883]\n",
+ "Distances (cosine): [0.85027926 0.86938012 1.03683104 0.93396593 0.8517932 0.94105315\n",
+ " 1.05185122 0.98357043 0.6889369 0.82534499 0.82756996 0.99155765\n",
+ " 0.8591207 0.95481632 0.72335077 1.16213324 0.39311616 0.85113179\n",
+ " 0.68181206 0.7110691 0.32714476 0.74950675 0.50597078 0.59148818\n",
+ " 1.13618308 1.0751477 1.08494275 1.15588561 1.21703612 1.09229737\n",
+ " 1.22772676 0.74541898 0.99537383 1.08301708 0.7719991 1.2673668\n",
+ " 1.29601781 1.09502218 0.92060473 0.95244188 0.9003239 1.03527477\n",
+ " 0.88353811 1.18839738 0.9782056 0.87005954 0.78291293 1.03204861\n",
+ " 1.05101714 1.12013283 0.63235195 1.15736129 0.57690555 0.71678475\n",
+ " 0.88331787 0.97702656 1.08756655 1.09786376 0.87406384 1.10102927\n",
+ " 1.10679952 1.23524452 1.13013882 0.91488497 1.03290282 0.99177589\n",
+ " 1.07098037 0.8866828 0.63595477 1.14710751 1.01764648 1.03670659\n",
+ " 1.05925665 1.16699974 1.14528347 1.20818163 0.8848487 0.77824417\n",
+ " 0.83458589 1.03717495 0.8338719 0.81858591 0.97475573 0.86293719\n",
+ " 0.99027256 0.98472132 1.06839995 0.76686355 1.07205268 1.13862054\n",
+ " 1.08742281 0.88902881 0.90157418 1.11888862 1.14182568 0.94743857\n",
+ " 0.97681878 0.75899276 0.6860931 0.86154734 0.74076582 0.85256853\n",
+ " 1.12834968 0.96745575 1.18487517 0.95941859 1.00375795 1.11513943\n",
+ " 0.60465444 1.08303481 0.8709872 1.01517412 1.12674414 0.65305199\n",
+ " 0.76860847 0.87617751 0.79768465 1.02952933 0.80883198 1.00838442\n",
+ " 1.05343604 0.8496953 0.85484933 0.89222036 1.05620511 0.90522922\n",
+ " 1.05477776 0.99812039 1.23255629 1.15360367 1.16457896 1.14415615\n",
+ " 0.93441402 1.13777546 1.076951 0.99928051 1.08488403 0.97635222\n",
+ " 1.00829312 0.93837662 0.96770957 1.23418307 1.03426783 0.85361792\n",
+ " 1.16321725 1.02621389 0.69063431 0.81572021 0.83985867 1.08783342\n",
+ " 1.0867341 0.8626893 1.1525388 1.09966724 1.16703542 1.05996631\n",
+ " 0.95968976 1.01634708 0.93089716 0.92166262 0.97848943 0.9477233\n",
+ " 0.92458992 0.78628398 0.81121962 0.86823849 1.11546237 0.9741085\n",
+ " 1.10063656 1.01523513 1.12429974 0.94522958 0.9109397 0.85137022\n",
+ " 0.91244166 0.83739388 0.9724517 0.88045476 0.95705168 0.63978677\n",
+ " 0.85534454 1.01368285 0.74003621 0.81096514 0.59718569 0.9891135\n",
+ " 1.15483858 0.94546531 0.67865811 0.77638446 0.83900927 0.6044996\n",
+ " 0.81423667 1.08989162 0.84878913 0.98037734 1.0660677 0.95228607\n",
+ " 0.74551822 0.42819857 0.5576637 0.63572994 0.34702285 0.7266914\n",
+ " 0.44258671 0.56388731 1.14961341 1.03713846 0.85068439 0.76216518\n",
+ " 0.72275349 1.04865963 0.72335149 0.88473588 1.10212243 0.99281162\n",
+ " 0.96660555 0.89133524 1.00696983 0.97575266 0.85100919 1.06082798\n",
+ " 1.14323778 1.00592128 1.11127407 1.0889305 1.18066474 0.97920373\n",
+ " 0.97855612 0.98374869 0.80052119 1.1310446 0.59916008 1.1188179\n",
+ " 0.57081713 0.90736488 0.84585526 1.13490855 1.09712425 1.29729615\n",
+ " 1.24281219 0.71446248 1.06192263 0.89072942 1.2140021 1.10342741\n",
+ " 0.99146013 1.23824971 0.91822548 0.70005851 0.71270316 1.09665363\n",
+ " 0.73310157 1.08179519 1.05067207 1.10393656 0.91482319 1.02106412\n",
+ " 0.94810558 1.00711245 1.02297042 0.68378228 0.80749057 0.88823184\n",
+ " 0.83499626 0.74388235 0.77309171 0.96262951 0.95852926 0.93513562\n",
+ " 0.835913 1.04542186 1.13393442 0.96653228 1.05198983 1.04425016\n",
+ " 0.55653433 0.93147509 0.80912184 0.70413703 0.64467075 1.20363512\n",
+ " 1.03791868 1.12699871 0.94457998 0.95038199 0.94620442 0.74093163\n",
+ " 1.00299753 0.99193895 0.9975173 1.19284892 0.85770929 1.11650213\n",
+ " 1.0066536 0.93668807 0.6704685 0.93389572 1.02309013 0.81718488\n",
+ " 0.88154057 1.23801894 0.92456003 0.86186722 0.77331893 0.9939457\n",
+ " 0.64174372 0.72377115 0.43626118 0.59091922 0.90293833 1.07042179\n",
+ " 0.8379341 0.75709428 0.96890438 0.87841952 0.9071956 1.03903105\n",
+ " 0.97887195 1.01157404 0.71607757 1.14711585 0.99643463 0.9160738\n",
+ " 0.9353228 0.76006652 0.76936986 0.77566505 0.98197923 0.98231696\n",
+ " 1.0087327 1.00753523 0.97618373 1.02026899 0.93817117 0.90539471\n",
+ " 1.01906619 1.13624189 0.81406669 1.12513335 1.04411064 0.88813539\n",
+ " 0.85213981 0.83084233 0.7175158 0.83277285 0.8038537 0.72851699\n",
+ " 0.95856793 0.90199422 0.98021475 0.7022062 0.7032129 1.05965112\n",
+ " 0.72680256 0.83988739 0.95099984 0.98674743 0.37678638 0.97116994\n",
+ " 0.74590307 0.84577326 0.93745513 0.96039029 0.68139275 0.56277253\n",
+ " 0.74609263 0.64179324 0.74799084 0.63941424 0.3360463 0.7319595\n",
+ " 0.9550492 1.01735961 0.92800106 0.87212077 0.85984275 0.63335458\n",
+ " 0.80417072 1.04066257 1.06743627 0.76014011 0.68453769 0.76493058\n",
+ " 0.83685836 0.85757753 0.79520803 0.90836483 0.70692402 1.08913506\n",
+ " 1.06378164 0.71296864 0.5318079 0.88301624 0.85924498 0.66850444\n",
+ " 1.1328257 0.98739837 0.96280933 0.97722016 1.12476375 1.05816086\n",
+ " 0.855959 1.08532668 1.07847039 0.82852503 0.87629673 1.07277836\n",
+ " 1.00826189 0.68605534 1.16066891 1.0873077 1.03248433 1.07966462\n",
+ " 1.02193057 0.87083919 1.07382113 0.65354791 1.15822648 0.84640656\n",
+ " 0.99107444 1.11825103 1.08210912 1.01025104 1.09530516 0.80685421\n",
+ " 1.01924788 1.07529628 0.89897595 0.98772309 0.97264781 0.86259672\n",
+ " 0.98748185 0.87393031 0.82504837 1.0270059 1.01653905 1.04659229\n",
+ " 1.0734658 1.04216928 0.84955863 1.00619812 1.05350906 1.08560793\n",
+ " 0.89940555 0.94052923 0.8351545 0.88200004 1.06095702 0.95970161\n",
+ " 1.07798487 1.0716473 1.00131074 0.942993 0.88192364 0.89124349\n",
+ " 1.05945407 0.81093137 0.83879878 0.95733917 0.99515782 0.92032881\n",
+ " 0.8921864 0.78552956 0.85329483 0.98895064 0.74702144 0.81640171\n",
+ " 0.85908358 0.90393817 0.97537186 0.93114848 0.89509852 0.98623485\n",
+ " 0.92585133 1.20349028 0.7323127 0.74032764 0.7564327 1.01443265\n",
+ " 0.97885389 0.97989887 0.90224117 0.86856419 0.82330849 0.83748068\n",
+ " 0.87422685 0.64199869 0.7872544 0.78728476 0.83714168 0.69140304\n",
+ " 1.06532385 0.96316269 0.94815467 0.89910071 0.93332216 1.29074759\n",
+ " 1.32872837 1.11016799 0.77276424 0.80032956 1.1318285 0.97072455\n",
+ " 0.99090964 0.87192794 0.90485336 0.89851838 0.93839196 1.16214866\n",
+ " 0.97348263 0.83532783 0.94960877 0.93747749 0.92744469 1.13610403\n",
+ " 0.88025365 0.83481303 0.90118769 0.78969614 1.04819635 0.92573888\n",
+ " 1.00078029 0.96802616 1.14078596 0.99972058 1.06267382 1.1191855\n",
+ " 1.10333854 1.21054898 1.23412189 1.13198852 0.86667811 0.87345249\n",
+ " 0.63608226 1.23648673 1.12384691 1.123744 1.14385893 0.96818703\n",
+ " 1.03337979 1.24278098 0.90601802 0.86946576 1.06703089 1.13886802\n",
+ " 1.01091832 0.927081 0.77625446 1.01827333 1.0899561 0.96763183\n",
+ " 1.05111291 1.0969471 1.02208003 0.9545395 0.84521509 0.95792995\n",
+ " 0.94820432 0.79963067 0.58647168 0.79589575 0.79506977 1.04827178\n",
+ " 1.17253642 1.05111677 0.84953223 1.11923266 1.11159929 0.92045392\n",
+ " 0.9396734 0.96776654 0.9906914 0.88226451 0.84051188 0.60937509\n",
+ " 0.8012998 0.95082409 0.88211756 0.98664359 0.90602397 1.03055466\n",
+ " 0.87489391 1.08895224 1.03727143 1.12762453 0.63857225 0.66155415\n",
+ " 0.97221054 0.71956446 0.75967009 0.86888564 0.90443239 1.0694059\n",
+ " 0.87587347 0.90670481 0.90588247 1.04795408 0.93420838 1.04887284\n",
+ " 0.95888011 1.06213087 1.2170206 0.80679429 0.95660671 1.00914037\n",
+ " 0.72573698 0.71583814 0.98074695 0.91274656 1.13978346 1.00785996\n",
+ " 0.84066635 0.85408688 0.98270181 0.87417911 0.97970217 0.8135024\n",
+ " 0.96901914 0.96800944 1.10420551 1.00441832 0.88336196 0.84259626\n",
+ " 1.02103809 0.9867404 0.83924652 0.9282772 1.01569183 0.99045284\n",
+ " 0.86532125 1.07132457 1.11916042 0.92409006 0.7869966 0.97967815\n",
+ " 0.60227139 1.13119105 1.17068637 0.83197875 0.83291981 1.07752283\n",
+ " 1.14591205 1.10275467 1.11604747 1.17261929 0.81830111 0.7341991\n",
+ " 1.14899489 1.00894662 0.78401169 0.94329404 0.98301021 0.85541349\n",
+ " 0.93422613 0.92167027 0.81380916 1.04419489 0.93667762 0.88094109\n",
+ " 0.71034521 1.01978941 1.01477568 0.92116888 0.95290002 0.74605114\n",
+ " 0.99900057 0.95165538 0.87291955 0.79672869 0.78111824 0.81800149\n",
+ " 0.81958085 0.70492237 0.65028099 0.68315842 0.62790367 0.84643214\n",
+ " 0.82799433 0.63289467 0.83760637 0.84406011 0.62671952 0.73126021\n",
+ " 0.91210069 0.77836218 1.17361277 1.05446476 1.17624901 0.34612718\n",
+ " 0.81701362 0.67759816 0.62809201 0.28863132 0.67678309 0.63434821\n",
+ " 0.62324841 1.21769907 0.98682902 1.19200534 1.14025451 1.09499516\n",
+ " 1.10267349 1.09841592 1.04738598 1.01494056 1.07195474 0.78843476\n",
+ " 0.94945012 1.03111514 0.72727668 1.1561826 0.99818285 0.95194042\n",
+ " 1.09277246 0.86474025 0.98911328 0.84479153 1.04824791 0.94429402\n",
+ " 0.67354386 0.72554871 1.03012611 1.04437591 1.01156754 1.03233104\n",
+ " 1.06340766 0.79064531 0.96804192 0.88084698 0.91620912 0.99090477\n",
+ " 0.60439413 0.97364667]\n",
+ "Distances (cosine): [0.9640993 0.79699994 1.09986502 0.84237682 0.84863022 0.93837033\n",
+ " 1.07841196 1.02805957 0.69520493 0.97629205 0.90331336 0.99732104\n",
+ " 0.96492661 1.11886996 0.75796474 1.09612358 0.68740512 0.7584465\n",
+ " 0.34887749 0.44535615 0.64352897 0.62966815 0.23350003 0.30958678\n",
+ " 0.94908845 1.15184299 1.10352169 1.23195209 1.14882867 1.09639136\n",
+ " 1.13692662 0.76488881 0.92817093 1.03795404 0.97331036 1.15739918\n",
+ " 1.22457966 0.95253103 0.94298229 0.90288162 0.85286628 1.04495653\n",
+ " 0.80403952 1.10766102 0.94633592 1.19325061 1.10003179 1.13188064\n",
+ " 1.08753891 1.20071959 0.37292459 1.09508229 0.33690319 0.72906508\n",
+ " 0.78767758 0.98321252 1.11290476 1.0548636 1.05271729 1.07018568\n",
+ " 1.1633898 1.29527216 1.11920955 1.10961616 1.19010542 1.00566022\n",
+ " 0.99471763 0.73975335 0.63713328 1.0880376 1.10704951 1.02651322\n",
+ " 0.99297676 1.18502407 1.03377546 1.02001939 1.07396828 0.92303849\n",
+ " 0.94784728 0.88795639 0.94458516 0.75313719 1.0858257 0.85260693\n",
+ " 0.90548389 0.98460732 1.04576865 0.80823235 0.87124624 1.0509202\n",
+ " 0.99189713 0.83857197 0.89124998 1.10615267 1.09193806 1.06307056\n",
+ " 1.04722817 1.04819679 0.70297256 1.084382 0.84727173 0.82036039\n",
+ " 1.0337954 1.01200552 1.16488638 0.88649608 0.91055832 1.12887916\n",
+ " 0.57079833 1.05737377 0.75267376 1.06887253 1.16029955 0.67905778\n",
+ " 0.46825468 0.9157126 0.42191851 1.03705668 1.02243457 1.17912015\n",
+ " 1.23426381 1.01052644 0.81240271 0.95498956 0.99096624 0.85801257\n",
+ " 0.97214744 0.96583189 1.17991593 1.10431824 1.05618854 1.08910781\n",
+ " 1.02159446 1.12260079 1.07865804 1.09036334 1.22520778 0.71964885\n",
+ " 0.8750669 0.98034175 1.01458896 1.202222 0.93936417 1.03569487\n",
+ " 1.12396458 1.09177669 0.76199239 1.06239105 0.89514679 1.05428316\n",
+ " 1.05698992 1.0199024 1.07435772 1.14019548 1.08265623 0.97801383\n",
+ " 0.96752076 0.93965086 1.03564324 1.00605766 1.03696195 0.97583326\n",
+ " 0.97335048 0.60813266 0.89208176 0.944989 1.07530572 0.83897213\n",
+ " 1.14577609 0.96248398 0.94913484 0.85172507 0.71530978 0.95444484\n",
+ " 1.02891525 0.90347319 1.05056097 0.85354203 1.09431545 0.77921213\n",
+ " 0.86364651 0.93009692 0.69878816 0.87577969 0.6576698 1.0650245\n",
+ " 1.2559693 0.92221451 0.83708187 0.91825058 0.95049304 0.90203442\n",
+ " 0.98929249 1.01642476 0.88054962 0.96162952 1.06976663 0.91925555\n",
+ " 0.89573795 0.81570876 0.63585618 0.42818188 0.71096397 0.7633811\n",
+ " 0.23628287 0.33633961 0.96959382 1.17431892 1.05050824 1.01096852\n",
+ " 1.06813021 1.02451246 0.87753014 0.97595722 1.19693314 0.80172812\n",
+ " 0.90166816 0.89356936 0.86327427 0.98003599 0.90030543 1.03032\n",
+ " 0.99233638 0.92171781 1.0104735 1.02963288 1.15656196 1.07644942\n",
+ " 0.98363459 1.0491147 0.8407148 1.06748866 0.34870545 1.17381653\n",
+ " 0.87717743 1.01919614 0.68826273 1.29322726 1.27832372 1.28541115\n",
+ " 1.19436095 0.3576905 1.04427175 0.81747915 1.08656433 1.12511809\n",
+ " 0.96842095 1.14545219 0.95497139 0.74183274 0.83482312 1.06216532\n",
+ " 0.77723301 1.02151839 1.02799237 1.0244516 0.9772488 1.17553896\n",
+ " 0.9053147 1.21626279 1.21858605 0.71251691 0.84843965 1.05079004\n",
+ " 1.09386599 0.95550006 0.85665361 1.12551201 1.12396114 1.09933953\n",
+ " 0.70139866 1.05643536 1.21422624 0.89690958 1.07005488 1.04275651\n",
+ " 0.81334154 0.92944535 0.86535959 0.5304813 0.77489586 1.05440964\n",
+ " 1.04132413 1.18294092 0.92936784 1.09856545 0.93011821 0.994987\n",
+ " 0.99293771 1.01832635 1.0323814 1.03651737 0.8162463 1.03445635\n",
+ " 1.0393354 0.96332485 0.8802862 0.89416538 0.88921979 0.84756707\n",
+ " 0.78235454 1.21479542 0.99747125 0.83240989 0.89295588 1.11347277\n",
+ " 0.72021878 0.92478823 0.73228249 0.27720932 0.8549848 1.04681009\n",
+ " 0.88847747 0.79264941 0.81604479 0.85317487 0.57857088 1.09034429\n",
+ " 1.03324891 1.00293259 0.49073483 1.02732309 0.99081297 0.96343451\n",
+ " 0.96242482 0.92416716 0.79107065 0.92786569 0.9245141 0.92647353\n",
+ " 0.7839101 0.91757127 0.86634149 0.84181649 0.96414228 0.74094544\n",
+ " 0.97267163 1.11905111 0.89687418 1.17734929 1.082337 0.80906485\n",
+ " 0.87124504 0.96182753 0.84841982 0.86389783 0.89584251 0.85475054\n",
+ " 0.9993762 0.96787098 0.95975712 0.89336805 0.87099456 0.95606513\n",
+ " 0.70608644 0.91101126 0.94630152 1.04581005 0.35622075 0.96273664\n",
+ " 0.74447761 0.73348279 1.06314809 0.96768244 0.88580677 0.92799334\n",
+ " 0.83054254 0.83837722 0.97873592 1.06848373 0.62536938 0.84208893\n",
+ " 0.99473204 0.90404592 1.0286489 1.03058003 0.93724164 0.91358049\n",
+ " 1.09374484 0.87907219 1.01416603 0.7857285 0.97385777 0.76574927\n",
+ " 0.89758594 0.89429332 0.83086945 0.89082861 0.71391764 0.98994632\n",
+ " 1.15336246 0.86700479 0.79451002 1.00507003 1.08453869 0.77503003\n",
+ " 0.99681163 1.02318083 0.93513651 0.98474194 1.11328412 1.06592778\n",
+ " 0.8202716 1.08997623 1.04944714 0.85613897 0.72320502 1.02313871\n",
+ " 0.89752921 0.70365912 1.01391808 0.99938188 1.02504636 1.11781768\n",
+ " 0.92621503 0.90039325 0.909878 0.63369778 0.98562951 0.80162692\n",
+ " 0.92099258 1.17710141 1.13750494 1.00285106 1.0756449 0.93328874\n",
+ " 0.99396461 0.95127048 0.92189007 1.02876974 1.03610349 0.91473701\n",
+ " 0.92835777 0.91267457 0.71710206 1.13870882 1.11193386 1.11839091\n",
+ " 1.21451225 1.04275482 0.81247145 0.96262034 1.21395455 1.26878367\n",
+ " 0.97418671 1.062595 0.99820081 0.89072411 1.11190141 0.91461111\n",
+ " 1.26343646 1.10436252 0.97953956 1.01693443 0.85258903 0.98794351\n",
+ " 1.13485159 0.89906035 0.94800102 0.95300161 0.92048665 0.90974068\n",
+ " 0.78440614 0.68553956 1.00860055 1.06461087 0.99009641 1.02069526\n",
+ " 0.94848482 0.93305782 1.0160739 0.95378741 1.03891594 1.07107594\n",
+ " 0.95954109 1.04979115 0.88626582 0.81950732 0.91569664 0.91022483\n",
+ " 1.05428021 0.73438711 1.00966479 0.86919406 0.78073844 0.87081939\n",
+ " 0.86333941 0.77837867 0.77680106 0.73603365 0.76148676 0.96805507\n",
+ " 1.1711913 1.13495742 1.12996091 1.23133273 0.92199168 1.2644181\n",
+ " 1.28730885 1.16867763 0.85377384 0.94842568 1.12345744 0.94942429\n",
+ " 0.94874334 0.83464591 0.96142326 1.06801144 0.91741855 1.12269446\n",
+ " 0.960189 0.79924978 0.91201278 0.77009228 0.92138468 1.00942213\n",
+ " 0.81055642 0.81136672 0.89385485 1.00787732 1.20463405 0.96991912\n",
+ " 1.03020148 1.02693635 1.12062703 0.99159575 0.96353349 1.18204009\n",
+ " 1.08260574 1.14901338 1.20625337 1.0722351 0.86871404 0.70398938\n",
+ " 0.65695904 1.12824584 0.910812 1.14592796 1.20287737 1.00705008\n",
+ " 1.00627192 1.12926463 0.90434841 0.70577964 1.08313163 1.04051603\n",
+ " 0.92663498 0.90243345 0.61046286 0.99511666 0.92499561 0.92649286\n",
+ " 0.98553772 0.96679946 0.9958036 0.97338856 0.80602442 0.92819\n",
+ " 0.97611385 0.75887914 0.78937502 0.97336985 0.91134288 1.05322354\n",
+ " 1.0218551 1.07854823 0.80382818 1.13486529 1.07819458 0.96523802\n",
+ " 0.9048751 0.98399547 0.96316543 0.97669869 0.74715758 0.79513391\n",
+ " 0.81415386 1.04312258 0.83303872 0.91564231 0.76986244 0.96587564\n",
+ " 0.81041811 0.98550269 0.88319786 1.16827758 0.73850472 0.74403601\n",
+ " 1.05718927 0.88414818 0.73391478 0.8438455 0.84416384 1.07346636\n",
+ " 0.90440824 0.9727104 1.00622954 0.99081652 0.80908583 0.83936706\n",
+ " 1.03767102 1.02146814 1.03838392 0.94078895 0.97048102 1.02162025\n",
+ " 0.64856801 0.68504973 1.00707922 0.98771797 0.91131966 0.9493894\n",
+ " 0.80177702 0.90394974 1.09360264 0.86900394 0.9006126 0.6995088\n",
+ " 0.7493072 0.74866107 1.14769184 0.74361704 0.96530201 0.87913855\n",
+ " 1.02019751 0.88891139 0.89556652 0.89441066 0.97573306 0.92274332\n",
+ " 0.85920736 1.00772964 1.01175937 0.97171441 0.72014826 0.7695259\n",
+ " 0.60049885 0.95822193 0.98071364 0.74108661 0.83062638 1.0133477\n",
+ " 0.98314087 0.93432704 1.16874278 0.99910121 0.99576993 0.71946615\n",
+ " 1.03072026 0.92775501 0.77934698 0.85503733 0.9107281 0.96024453\n",
+ " 0.85137667 0.71487427 0.69744085 0.95802644 0.81478367 0.78537957\n",
+ " 0.62050026 0.84240748 0.9084148 0.88880322 0.90758798 0.8570286\n",
+ " 0.78055429 0.88930972 0.74096005 0.60059808 0.69333365 0.6608848\n",
+ " 0.72040725 0.62396579 0.43073537 0.65759007 0.6958151 0.74786208\n",
+ " 0.76273381 0.62464676 0.80341517 0.79459691 0.52198217 0.68808826\n",
+ " 0.6772867 0.78064569 1.20129983 1.07989404 1.11822884 0.64806325\n",
+ " 0.69265845 0.32592226 0.31832404 0.5987124 0.65226267 0.28394492\n",
+ " 0.31943969 0.93136601 0.93841555 1.04858979 1.03495732 1.01500057\n",
+ " 0.91079287 0.90597223 0.9635261 0.90323389 1.18402958 1.09913312\n",
+ " 0.9672969 1.01206062 0.85547377 1.05430686 1.01164409 0.99171344\n",
+ " 1.21710651 0.77154524 0.93519933 0.80349673 1.06023912 0.77811318\n",
+ " 0.70774461 0.6843138 0.91070058 1.09019953 0.87468856 1.04214032\n",
+ " 1.0467724 0.93796523 0.96441884 0.87215196 0.92677263 0.96878135\n",
+ " 0.67977902 1.09698215]\n",
+ "Distances (cosine): [0.79908111 0.73927687 1.06708251 0.85273174 0.73477776 0.97931733\n",
+ " 1.15717008 1.04689102 0.76079206 0.95669637 0.8625687 0.96083231\n",
+ " 0.93360211 1.16530425 0.80655799 0.95865983 0.84671028 0.87091818\n",
+ " 0.38244526 0.78497146 0.83519839 0.80662462 0.50979546 0.71074501\n",
+ " 0.71153826 1.04650101 1.0649136 1.11200005 1.1937854 1.01285918\n",
+ " 0.97267796 0.88781235 1.11234429 0.98895468 0.85436357 0.88559368\n",
+ " 0.89954621 0.96007104 0.7897397 0.98097446 0.80211829 0.87353476\n",
+ " 0.84895218 0.85481954 0.876547 1.12109919 0.97923431 1.09625631\n",
+ " 1.15711111 1.11522427 0.64456604 1.09841815 0.66604295 0.81208979\n",
+ " 0.84036416 0.77816837 1.1547357 1.10989079 0.9876886 0.66489987\n",
+ " 0.86905466 1.07481958 0.94426439 0.81386628 0.92069161 0.98126827\n",
+ " 0.83084011 0.84017016 1.0572067 0.86548504 0.87353432 0.93402167\n",
+ " 0.95065681 0.85075874 1.04778706 1.00473324 0.8759625 0.84068109\n",
+ " 1.02038454 0.79316769 0.90681227 1.12942827 1.00747079 0.93714305\n",
+ " 0.95206313 1.01678202 1.09657377 1.05726545 0.86026982 0.98790538\n",
+ " 1.0157762 0.88152324 0.90006525 0.93870305 1.01315261 1.20427408\n",
+ " 0.85696882 0.9616036 0.70567474 1.11066946 0.91650548 1.04384959\n",
+ " 1.01096906 1.10939046 1.12584293 0.72615414 0.70119282 0.91918429\n",
+ " 0.95433439 0.90014563 0.97574681 1.00754574 1.2220121 0.84882906\n",
+ " 0.55364322 1.01385129 0.62728755 0.99663488 1.06725993 1.04393421\n",
+ " 1.14147949 1.17752329 0.89359901 0.94216866 1.11553475 1.10094536\n",
+ " 1.05861669 0.99963173 0.95077919 0.96474281 0.95209713 0.96762571\n",
+ " 0.84017964 0.94947213 0.97640498 0.85818313 0.91695714 0.74926454\n",
+ " 1.1055307 0.99550005 0.89494416 0.8502504 1.07676681 0.97099619\n",
+ " 0.73644759 0.72589955 0.7679438 0.84877673 1.03543086 1.04647861\n",
+ " 1.11674486 1.05081578 1.13449978 1.06833339 1.10528614 1.07234597\n",
+ " 0.99815522 0.89495853 0.96159561 0.90249731 1.00699271 1.09662969\n",
+ " 0.93795777 0.81279839 0.85128067 0.87188531 1.19001346 0.92097311\n",
+ " 1.14034961 0.99351593 1.00656554 1.01699075 0.98305181 0.94896057\n",
+ " 0.97632637 1.01067973 0.9091754 0.89769507 1.00432221 1.05588188\n",
+ " 1.12076591 0.88723504 1.08645919 0.88562836 0.85784169 1.11320863\n",
+ " 1.06126668 1.01341861 0.96807518 1.08298973 0.99437477 0.93235862\n",
+ " 1.01337308 0.97605538 0.83756043 0.75838728 0.98120539 0.70363846\n",
+ " 0.8458801 0.92780311 0.65682844 0.80503096 0.93150785 1.04293512\n",
+ " 0.68522719 0.82562939 1.11108932 1.11014248 0.75362199 0.97349169\n",
+ " 1.07496052 1.02851022 0.88174533 1.03125071 1.05449818 0.71758938\n",
+ " 0.79159791 1.00759128 0.87518147 0.98801904 0.90998367 1.09813142\n",
+ " 1.03657542 0.95999973 1.00392549 1.08573047 0.99440003 1.03081182\n",
+ " 0.78812237 1.01927 0.77064493 0.95279206 0.72252125 1.02399697\n",
+ " 0.74197884 0.97826187 0.89888565 0.99355318 0.96201307 0.96652534\n",
+ " 0.94184268 0.25396192 0.7493765 0.90807884 0.70809421 0.94178418\n",
+ " 0.97511678 0.80634451 0.98581152 1.06072993 1.01005566 0.94320242\n",
+ " 1.02337968 0.64439638 0.64769702 0.60513261 0.7555751 0.94128801\n",
+ " 0.99187264 0.86386565 1.07299225 0.90880302 0.77923438 0.94077548\n",
+ " 0.88673337 0.7281395 0.82725276 0.98710909 0.86437454 1.04571151\n",
+ " 0.79312343 0.96648613 0.96167187 1.09885804 0.77093196 1.16519824\n",
+ " 0.87990819 0.83667481 0.80933533 1.07361809 0.97240444 0.89642668\n",
+ " 1.06981224 1.30089778 1.05820417 0.99594716 0.96550001 0.89359601\n",
+ " 1.05851306 0.91861905 0.90424047 0.99563761 0.66308237 1.0520874\n",
+ " 0.93400518 1.1609704 0.97295296 0.73532681 0.881064 1.05832384\n",
+ " 1.05485217 1.0342685 0.88448353 0.9726835 1.05777027 0.93390591\n",
+ " 0.78788859 0.89349423 0.74393231 0.53463579 0.69697003 0.95201781\n",
+ " 0.76087283 0.71435445 0.78956002 0.40986545 0.55243629 0.81466177\n",
+ " 0.95964474 0.87679209 0.51171364 0.96375923 1.00546129 1.00787475\n",
+ " 0.81722586 0.88161951 1.06886271 0.7655628 1.09495965 1.05379447\n",
+ " 0.99277356 1.00476895 0.88036287 0.88438476 0.86537457 0.8261744\n",
+ " 0.9446573 0.73611498 0.88303315 0.97983821 0.92940804 0.91261826\n",
+ " 0.83367175 0.98961829 0.95246502 0.96717629 0.92708615 1.05043092\n",
+ " 1.01433278 1.1678357 0.9636643 1.11426713 0.77385638 0.70012296\n",
+ " 0.75033189 0.82984885 0.93324737 1.04789456 0.61246075 0.88989631\n",
+ " 0.82865582 0.92108745 0.93318505 0.88514942 1.11284032 0.90753186\n",
+ " 1.05065392 1.09176979 1.11070994 1.16162725 0.98366305 1.0839678\n",
+ " 1.1932071 1.11767584 1.03593003 1.05563643 0.89452448 1.24350825\n",
+ " 1.1779094 0.71477341 1.03844376 0.8602801 1.10357657 0.96775181\n",
+ " 1.03087216 0.98334745 1.05016065 0.92747637 0.82638633 1.02746375\n",
+ " 0.92436155 0.80349123 0.8313303 0.96193335 1.08843484 0.64826834\n",
+ " 0.99841496 0.83824504 0.8123247 1.01839856 1.15155453 1.04880872\n",
+ " 1.01510452 1.07802569 1.14784751 0.99891638 1.09521153 1.11986649\n",
+ " 1.06713559 1.05363791 0.90810599 0.83434853 0.8107911 0.94172835\n",
+ " 0.9896609 0.9982981 0.95329301 0.91994077 0.99442741 0.96535035\n",
+ " 1.03421622 1.13986887 1.06472476 0.99306289 0.95366658 1.00997007\n",
+ " 0.9306631 1.01658451 1.09833789 1.1099149 0.99630483 0.86665176\n",
+ " 0.95058076 0.90166695 0.99837408 1.12124232 1.0524317 1.0464936\n",
+ " 1.06885405 1.02390015 0.94150143 0.92480506 1.07705303 1.077649\n",
+ " 0.92056932 0.92294082 0.99023854 0.95974531 1.10811909 0.96900426\n",
+ " 1.17729322 1.16700669 0.99470372 1.07160378 0.9825647 1.14979498\n",
+ " 1.27901756 1.04627501 1.12532388 1.01628432 1.03325062 1.16245798\n",
+ " 0.87847854 0.67649062 0.80839075 0.90618508 0.95748419 1.043942\n",
+ " 0.90106258 1.03349252 1.05486578 1.03720413 1.22440163 1.20915921\n",
+ " 0.91748325 1.01570589 0.91055753 1.06042773 0.82935107 0.83913135\n",
+ " 1.23908915 1.19224025 1.08792936 1.04037044 0.94311693 1.11121802\n",
+ " 1.00663387 0.84066813 0.9767006 0.86707671 0.97022408 0.99814927\n",
+ " 1.1442741 1.1654167 1.14562428 1.19097204 1.13578646 1.06334554\n",
+ " 1.0539729 1.00987822 0.82205625 0.87411286 0.92038039 1.01775004\n",
+ " 1.14994666 0.85396471 0.88434408 0.86745271 0.69687756 0.82026731\n",
+ " 1.0061832 1.06818965 1.08715668 0.83905553 1.00894969 1.03320296\n",
+ " 1.08996961 0.95516688 1.09027852 0.98481366 1.0692466 0.96337249\n",
+ " 0.77946137 1.15973017 1.12933446 1.18223796 1.06838696 1.16474659\n",
+ " 0.96523368 1.11094058 1.01445395 1.08538164 0.99719707 0.96837924\n",
+ " 1.00128105 1.21262294 0.77064615 1.17904666 1.29573023 0.94712684\n",
+ " 0.82827903 1.05534424 0.9948306 1.013226 1.06905636 0.93703991\n",
+ " 0.86041109 0.83724391 0.77395207 0.94721436 0.84805494 1.10094822\n",
+ " 0.89321219 1.01387545 1.02919957 1.05987136 0.89225483 1.00097486\n",
+ " 0.81888215 0.72551672 0.73611302 1.04437912 0.78607208 1.09475747\n",
+ " 0.97278861 1.07010782 0.99593159 1.0776409 1.16859478 0.96724973\n",
+ " 1.00400609 0.86766442 0.91879166 0.80014409 0.56049269 0.89961012\n",
+ " 0.85374424 0.80829507 0.82122676 1.0412673 0.80163904 0.67119885\n",
+ " 0.74820923 0.97707253 0.94478569 1.23434246 0.82572273 0.83816877\n",
+ " 0.80762537 0.87149442 0.82878897 1.0502462 0.9338512 0.99865436\n",
+ " 0.81923173 0.86731322 1.01531664 0.92746816 0.76621142 0.83689479\n",
+ " 0.8880825 1.03743199 1.07728455 0.8208952 0.98571323 1.00834672\n",
+ " 0.84867814 0.87063664 0.93794591 1.05451811 1.00579318 1.09513149\n",
+ " 1.03803631 0.93268498 1.30015033 1.07625263 0.89674032 0.87912433\n",
+ " 0.95189704 0.93805672 0.85864878 0.89673796 1.03297067 1.01670426\n",
+ " 0.97782869 0.9380319 1.10612258 0.90162563 1.16067494 1.1752781\n",
+ " 1.07374732 0.94531409 0.95593172 1.12653551 1.02273242 1.04571896\n",
+ " 0.6744976 0.88407937 0.98044862 1.04812163 0.93460592 0.97151996\n",
+ " 1.01862611 1.07114171 0.95642514 0.99544281 1.0239257 0.79447079\n",
+ " 1.04269873 1.07041066 1.00010442 0.90836743 0.89402054 0.94082642\n",
+ " 1.02966379 1.11548084 1.12162987 1.10051727 1.00345376 1.19852842\n",
+ " 1.04201465 1.01524578 1.0380211 1.1022665 0.96908443 1.03732815\n",
+ " 0.88183242 0.9042574 0.88305126 0.86813785 0.81322214 0.91198201\n",
+ " 0.82329748 0.58399836 0.67344312 0.9363441 0.9325137 0.73910824\n",
+ " 0.89819286 0.74077595 0.91707532 1.03875401 0.84873276 0.8655165\n",
+ " 0.78768683 0.78970092 0.95718926 1.00534419 1.04581949 0.82230526\n",
+ " 0.81955435 0.40488158 0.71774588 0.79811209 0.84710547 0.39699464\n",
+ " 0.75910707 0.72513821 1.02432389 1.16321629 1.10200362 1.01685563\n",
+ " 1.08144752 1.1498729 0.97053642 1.08768637 1.0287886 0.91475583\n",
+ " 0.89617455 1.02875432 0.88411852 0.89554627 1.02719507 0.97801067\n",
+ " 1.01463273 0.94886902 0.8437611 0.72296983 1.01923928 0.69517018\n",
+ " 0.65640334 0.92942246 1.1196623 0.9433791 1.08332614 0.83324564\n",
+ " 0.80490142 0.86333114 0.90904419 0.84543531 0.82554608 1.01255578\n",
+ " 0.76780056 1.05428984]\n",
+ "Distances (cosine): [0.527668 0.97500192 0.9221239 1.0757162 1.03432262 0.93281117\n",
+ " 0.87208268 0.96459169 1.06430908 0.60045559 0.74402294 0.81812041\n",
+ " 0.80228555 0.81392971 0.87476547 1.12214617 0.82201105 1.15347995\n",
+ " 0.94100947 1.10614256 0.82773723 1.16635552 1.00933521 1.12378526\n",
+ " 1.15157763 0.75613414 0.77501764 0.83588164 0.88674814 0.81474205\n",
+ " 0.78097442 0.89027819 0.98669819 1.12287692 0.78792681 1.13059253\n",
+ " 1.11677816 1.07971428 0.76449673 0.94627512 0.8416973 0.84479077\n",
+ " 0.93112701 1.0459214 0.91973465 0.79451399 0.79407811 0.93065084\n",
+ " 0.99240529 0.92820073 1.07032453 1.05447824 1.0955374 1.02696019\n",
+ " 1.01017163 0.95274773 0.9741497 0.98636249 0.24423288 0.84716004\n",
+ " 0.84357769 1.04289925 0.92560831 0.73445319 0.65546604 0.94399193\n",
+ " 0.99768288 1.05654644 0.92540986 0.91901684 0.85034383 0.80773919\n",
+ " 0.82382623 0.91348755 0.80537884 0.79689129 0.23881253 0.36648207\n",
+ " 0.79319947 0.82813141 0.92132681 1.13170765 0.94996224 0.94244032\n",
+ " 0.92154708 1.05485906 1.10368386 0.91186326 1.12691569 1.01735853\n",
+ " 0.98811497 0.9619129 1.00464106 1.10968805 0.94338915 1.06478764\n",
+ " 0.87883601 0.93354648 0.85627692 0.81072169 0.83720037 0.93201277\n",
+ " 1.13130718 1.13054781 1.17831873 0.83179331 1.2100147 1.00957361\n",
+ " 1.06376933 0.87839425 1.18798383 1.04568144 1.05886351 0.75055535\n",
+ " 1.158863 0.99354791 1.16908212 0.97109405 0.90391154 0.8973484\n",
+ " 0.97874308 0.85746005 0.88539048 0.60060596 0.98715553 1.25157164\n",
+ " 1.09171028 0.99124393 1.01751139 1.09734313 1.01643929 1.02539739\n",
+ " 0.88813004 0.9200778 1.118868 0.75233002 0.85342016 1.14184129\n",
+ " 0.9319245 0.75792237 0.63664436 0.80640746 0.73603447 0.9128514\n",
+ " 0.83584295 0.75768379 0.81797993 0.8040239 1.00950109 1.03650464\n",
+ " 0.96002748 0.77813257 1.07725181 0.86093337 1.10947656 1.01829263\n",
+ " 1.04625223 0.98460623 0.9124533 0.91437285 0.92793642 0.97563272\n",
+ " 1.04206019 1.10869838 0.97531818 0.99371151 1.0160163 0.9444338\n",
+ " 1.02313215 1.14709434 1.11628036 1.00568427 1.1177585 0.86852461\n",
+ " 0.65175003 0.94622119 0.93720079 0.81829976 0.92484313 0.99964736\n",
+ " 1.13054025 1.07414293 1.00163497 0.84717747 0.92404554 1.03493043\n",
+ " 0.94631328 1.06581992 0.94129479 0.92925672 0.88939705 0.68414537\n",
+ " 0.88170044 1.13958318 0.78554811 1.00295135 0.86515663 0.85657185\n",
+ " 0.94535719 0.78247536 0.85371224 1.10204042 0.84953363 1.2619723\n",
+ " 1.16169013 1.23220083 1.2237892 0.70050441 0.86248865 0.80836613\n",
+ " 0.80280108 0.93887806 1.05057002 0.92900718 1.01001261 1.02803824\n",
+ " 1.14336084 0.91057145 1.07429472 1.12221458 1.04314162 1.02664541\n",
+ " 0.9630015 0.99032975 1.01938347 0.95933765 0.91714068 1.11928176\n",
+ " 1.0347135 1.10169671 0.85972813 1.22298729 0.89509502 0.82750944\n",
+ " 0.65438216 0.94339688 0.97353717 0.66366348 0.64940178 0.83075974\n",
+ " 0.84800571 0.93198626 0.98296825 0.98234245 0.8628637 1.04658244\n",
+ " 0.91177141 1.06205416 1.0376464 1.04534721 0.99655727 0.94749908\n",
+ " 1.13749993 0.83182531 0.78464403 0.83595007 0.66520003 0.72671027\n",
+ " 1.06572607 0.96488601 0.75750512 0.9464862 0.94625877 0.94022671\n",
+ " 0.75413182 0.93029524 1.07112501 0.94041683 0.95456033 0.908184\n",
+ " 0.78575092 0.61478227 0.93209352 0.94064366 0.74835687 1.17714918\n",
+ " 0.82171249 0.89515892 0.80345739 1.07301501 0.90164563 1.0029371\n",
+ " 0.93882705 1.01212971 0.85823762 0.8334244 0.88196567 0.68399081\n",
+ " 0.94218102 0.87978592 0.90928344 1.16844924 1.0285414 1.02975426\n",
+ " 0.6373895 1.02835093 0.9132429 0.87748187 0.65240379 1.04997231\n",
+ " 1.00045094 1.11016766 1.00036471 1.09632258 0.98268494 0.67559427\n",
+ " 0.89407903 0.89278319 0.77971862 1.01092694 0.99482725 0.64824678\n",
+ " 0.39108287 0.93661521 1.07704279 0.88147622 1.18985815 1.06168893\n",
+ " 1.09790311 0.99315826 1.13584177 1.08963182 1.08653879 0.85027319\n",
+ " 1.16951034 0.84452895 1.02495224 0.70125196 0.87265605 0.78092524\n",
+ " 1.09277101 1.10886379 1.08379497 1.1762102 1.00671745 1.11979782\n",
+ " 1.00719447 0.89803566 1.00674339 0.95945929 1.0869182 0.70890198\n",
+ " 0.49417604 1.1597765 1.00667041 1.19329792 1.11763833 1.18866021\n",
+ " 1.17973783 1.19555964 1.18437664 1.09509961 0.78053227 0.92793041\n",
+ " 0.80805189 0.90609271 0.80871937 0.81387176 0.87645194 0.94575138\n",
+ " 1.06280063 0.9237728 0.87904094 0.75646923 0.94950148 0.79179003\n",
+ " 0.921351 0.88697256 0.75837177 0.68752281 0.81830838 0.94834889\n",
+ " 1.01726888 1.05439905 0.77235683 0.64060574 0.81308774 0.83586086\n",
+ " 0.78777647 0.96176595 1.24999283 0.80623133 0.78821169 1.07207937\n",
+ " 1.04848094 0.94761505 1.16516808 1.04534866 0.86968022 1.01969437\n",
+ " 0.78820407 0.88957681 0.9078708 0.94538919 0.795858 0.84059317\n",
+ " 1.11138591 0.89762764 0.84728868 0.58213273 1.09148103 0.87881526\n",
+ " 1.02892534 0.9232085 0.96380114 1.00693236 1.07318101 1.01806428\n",
+ " 1.13383549 0.99208417 1.06356413 0.99885986 0.8500064 0.95693527\n",
+ " 0.95830657 0.84698526 1.16554991 0.83844606 1.11203553 0.9253493\n",
+ " 0.95917024 0.90634881 0.938603 0.97452211 0.92406073 0.89765085\n",
+ " 0.98721005 1.09976387 1.015735 0.94977453 0.93465233 0.96670367\n",
+ " 1.01028356 0.98176289 1.14373027 0.86189213 0.85927942 0.89038611\n",
+ " 1.02735687 0.97803611 1.10832584 0.95221745 0.66881122 0.83537206\n",
+ " 0.81301185 0.63197911 0.69169903 0.83918746 0.8332434 0.63523671\n",
+ " 0.54671599 0.79837726 0.88177646 0.49072436 0.71310541 0.69522046\n",
+ " 0.75888488 1.01260704 1.16665019 1.22222713 1.11121528 1.02556801\n",
+ " 0.89283407 1.01992478 0.79394733 1.02136929 0.80818398 0.61451901\n",
+ " 1.04600226 0.74922425 1.01235614 0.89413899 0.94146684 0.92619254\n",
+ " 0.99059648 1.11579634 0.8315289 1.04268825 1.00119595 0.85294985\n",
+ " 0.97375476 1.18536993 1.18990416 1.0931943 0.95641917 1.17323944\n",
+ " 1.11385244 0.95139128 0.9490408 0.86565905 0.96262659 0.62050559\n",
+ " 0.75651191 0.6331299 0.68582775 0.66879802 0.77521981 1.21349821\n",
+ " 1.18202302 1.03272151 0.76752042 0.77613692 0.91926159 1.03783297\n",
+ " 0.93159605 0.96293336 1.03486969 0.75293795 0.92773575 0.94976919\n",
+ " 1.00329425 1.03508725 1.03334527 1.00219884 0.43303534 0.92563522\n",
+ " 1.03517228 0.98087644 1.00557935 0.69906947 0.77454459 1.02494968\n",
+ " 0.79179159 1.004452 1.23370558 1.10317989 1.16689457 1.0301278\n",
+ " 0.65651457 1.07285509 1.12945574 1.02924446 1.02468388 1.07422109\n",
+ " 0.88841869 1.13060169 1.0004553 0.79808561 0.9283944 0.59210061\n",
+ " 0.7630709 0.94854616 0.94689589 1.02256407 0.88237134 1.10657103\n",
+ " 0.86587767 0.84177599 1.13945382 0.93244145 1.09451256 0.9791961\n",
+ " 0.95409531 1.14442089 0.88729158 0.95784802 1.05605532 1.03473474\n",
+ " 0.8554417 0.86979217 0.78481785 0.87417889 1.00266003 1.15431645\n",
+ " 1.00781525 0.90331832 1.09975458 0.97957153 1.02269102 0.93868287\n",
+ " 1.04864025 0.870557 0.63498252 0.8580946 0.76365282 0.83107483\n",
+ " 0.94885134 1.07578018 0.93870211 1.08591191 1.15482354 0.75498755\n",
+ " 0.84016887 1.05939996 1.08279142 1.15916916 0.95953934 0.94818428\n",
+ " 0.90580089 0.79560999 1.05136111 1.04944898 0.97190086 0.92961428\n",
+ " 0.84249806 1.02135882 0.52873163 1.02118532 1.15673069 1.13527785\n",
+ " 1.02815684 1.09657583 1.08750893 0.92996535 1.0669267 1.06913378\n",
+ " 0.9057668 0.98848634 1.04555462 0.65573272 0.98602089 0.9856081\n",
+ " 1.0405076 0.91767168 1.11378856 0.9692744 0.97407286 1.1471927\n",
+ " 1.18898847 1.17142686 0.98051254 1.1187243 0.93696993 0.86447277\n",
+ " 0.86707951 0.86832143 0.76502552 0.90345131 0.96804011 0.98163398\n",
+ " 0.90856549 1.0375325 1.05181864 1.05266386 0.91894458 1.18376633\n",
+ " 0.83578341 1.09915557 1.07309575 0.94714346 1.11322046 0.98448226\n",
+ " 1.0299267 1.13639914 0.78668402 0.9719288 0.83813494 1.05326653\n",
+ " 1.12545139 1.03362844 1.03854837 1.00566168 0.97947914 0.94707057\n",
+ " 0.9647924 1.07120396 1.04820804 1.05782727 0.82671223 1.02775636\n",
+ " 0.96364984 1.04924499 0.92063779 0.94486394 0.6014982 0.83696224\n",
+ " 1.04980374 0.97355553 0.88644075 1.07804659 0.86024033 1.09564583\n",
+ " 0.91130036 0.85173334 0.99391237 1.05773131 0.88089678 0.7466161\n",
+ " 0.72365131 0.9649287 0.92093164 0.88575629 0.95488912 0.84003472\n",
+ " 0.95452068 0.61097274 0.99338125 1.03197637 1.0654022 0.84056987\n",
+ " 1.24563624 1.05016085 1.18985677 0.84326256 1.17270301 1.04965592\n",
+ " 1.22823281 1.29789095 1.09868394 1.12825222 0.98368382 0.95229503\n",
+ " 1.04881056 1.22327047 0.96232267 0.92500946 0.88184231 0.970676\n",
+ " 0.85682888 1.05119124 0.83418125 0.8945311 1.00339631 0.87054802\n",
+ " 0.65611905 0.97136538 1.04105355 1.01066599 0.85180336 1.14012398\n",
+ " 0.93394725 0.8810388 0.97609159 1.008992 0.99329879 0.99068841\n",
+ " 0.83648675 0.76043206 0.70559328 0.99031512 1.02453378 1.09644361\n",
+ " 1.00133627 0.86336839]\n",
+ "Distances (cosine): [0.81137089 0.9170987 0.97821887 0.96367252 1.01680849 1.0233152\n",
+ " 0.98524957 0.78464137 0.97328636 0.31307621 0.4301419 0.3885285\n",
+ " 0.35235122 0.74573222 0.95892806 1.11088256 0.97015814 1.14888065\n",
+ " 1.09967901 1.15842434 0.98561697 1.19448504 1.12446674 1.17669561\n",
+ " 1.07828485 0.8075318 0.84279061 0.92910738 0.9536258 0.81608865\n",
+ " 0.97015466 1.14021993 0.87512973 1.1771193 0.92116842 0.99452579\n",
+ " 0.94608461 1.11860235 0.70617574 1.03177076 0.93384651 1.00950711\n",
+ " 1.09819878 0.89724497 1.17351282 0.89438755 0.81131608 1.01680947\n",
+ " 1.08703477 0.92250026 1.11610599 1.05429039 1.12711796 0.958104\n",
+ " 1.22454533 1.05641883 0.88388853 1.01706471 0.69942414 0.69078233\n",
+ " 0.766334 0.85529651 1.14952619 0.90889071 0.78993064 0.93911353\n",
+ " 1.15272658 1.14915472 0.99661799 0.99226073 1.15697753 1.05260582\n",
+ " 1.04550221 1.05255793 0.89125088 0.93132845 0.70071068 0.6672572\n",
+ " 1.05131823 0.98937422 0.98100351 1.14516423 0.75152254 1.03753454\n",
+ " 1.12296409 0.76521322 0.84347732 0.8435878 1.05396609 1.03973618\n",
+ " 0.94364139 1.07026718 0.93318802 1.03210465 1.01586578 1.07216464\n",
+ " 1.05936344 0.95223192 0.80897994 1.06289684 1.04123568 1.18535601\n",
+ " 1.30983049 1.22412822 0.96828463 0.97618403 1.26069176 0.94598623\n",
+ " 1.08463874 0.99208842 1.22866599 1.2073261 0.97318618 0.84518007\n",
+ " 1.26764429 1.01247358 1.2778888 1.08479674 0.92766857 0.59299893\n",
+ " 0.73587757 0.99015511 1.08624506 0.91767606 0.86983326 1.21823807\n",
+ " 1.14390843 1.2548376 1.06724908 1.16518135 1.10555795 1.12903852\n",
+ " 0.99850703 1.13034879 1.15573706 0.80721121 0.82479113 1.25880454\n",
+ " 1.02887877 0.78907209 0.7681334 0.59341205 1.12356989 0.98048461\n",
+ " 0.98819685 0.90222507 0.98454449 0.92425565 0.98088383 0.80971405\n",
+ " 0.91298964 0.45273423 1.06406822 0.98164068 1.06112433 0.68224257\n",
+ " 0.92801991 0.9193082 0.85477962 0.87631294 0.8250427 0.63293993\n",
+ " 0.91280961 1.17545748 0.82712539 0.84771877 0.87386614 0.67047617\n",
+ " 1.02999884 1.05921665 0.98296949 0.94028625 1.00511757 0.89462585\n",
+ " 0.55373795 0.94837431 0.8710255 0.95115628 0.7337239 1.08780094\n",
+ " 1.14359051 1.23806609 1.00190729 0.97385188 1.0730556 0.89756427\n",
+ " 0.85096807 1.07086088 1.12836973 1.12367283 1.05144027 0.76652131\n",
+ " 0.87208718 1.10760973 0.78495624 0.96730334 1.00174789 0.88970335\n",
+ " 0.94778956 0.91588308 0.96699587 1.20452235 1.00222498 1.23773243\n",
+ " 1.15803013 1.25883086 1.14352479 0.59390593 0.96877777 0.72510628\n",
+ " 0.87576574 1.0556084 1.09724327 0.96571236 0.84864651 1.10162055\n",
+ " 1.04762332 1.0657229 1.16637487 1.17924745 1.10094606 0.9480854\n",
+ " 0.89088242 0.79736439 0.97436745 0.90714617 0.90539542 1.13843972\n",
+ " 1.22573705 1.22580095 1.00432982 1.367025 1.00567401 0.92735449\n",
+ " 0.87200522 0.75703985 1.14809137 0.50748863 0.46243719 0.86292505\n",
+ " 0.96011282 1.11114418 0.9607496 1.14355595 0.97312932 0.87262018\n",
+ " 1.02922907 1.16680197 0.83590489 1.05991792 0.84156576 0.90508994\n",
+ " 0.90172254 0.83283904 0.7903782 0.87156489 0.71682877 1.02568104\n",
+ " 0.97970458 0.96679515 0.8438116 1.09870007 1.18712692 0.98466167\n",
+ " 0.91866497 0.92213288 1.10935543 0.95943756 0.9472328 1.09162671\n",
+ " 0.96565299 0.52785266 0.90747449 0.94963257 0.70629235 1.07803232\n",
+ " 0.93449084 0.97481931 0.99406377 1.12720122 0.96702658 0.91967305\n",
+ " 1.01608316 0.98011863 0.99297031 0.85978246 0.70499871 0.81447142\n",
+ " 0.93154899 0.87545117 0.92525732 1.2475697 1.17217653 0.94111537\n",
+ " 0.8438113 0.98113367 0.93604129 0.86632207 0.94976961 1.078218\n",
+ " 1.14300469 0.91423329 1.1662529 0.90558297 0.76645815 0.41901488\n",
+ " 0.98688987 1.01785685 0.89330588 1.10083629 0.9532572 0.8728929\n",
+ " 0.76509692 0.97984521 1.14032072 1.02821758 1.19404209 1.02478253\n",
+ " 1.06139424 0.90506838 1.08627586 0.89836459 1.11576919 1.03913492\n",
+ " 1.11682241 1.03128174 0.93613082 0.93603049 0.90845903 0.92011513\n",
+ " 1.18298048 1.12528382 1.04227767 1.19749876 1.02386075 1.09179405\n",
+ " 1.13602114 1.06378584 1.1017933 1.19091374 1.14413541 0.97406803\n",
+ " 0.81114148 1.12247222 1.04945662 1.1899848 0.98676867 1.13043231\n",
+ " 1.16707136 1.18057469 1.18221 1.09439547 0.75036956 1.02312851\n",
+ " 1.04759372 1.07973118 0.90292492 0.7986211 1.0118339 0.78843886\n",
+ " 0.96219322 0.99537658 0.8108526 0.66506076 0.96843235 0.97415036\n",
+ " 0.9077366 1.02202327 0.58395516 0.90177236 0.91886263 0.8650244\n",
+ " 0.9242622 1.0092439 0.92499671 0.77926408 0.9475931 1.00445874\n",
+ " 0.83722301 1.1022762 1.13819027 0.8422212 1.0331059 1.16710131\n",
+ " 1.09731951 0.99830867 1.20579087 1.1593878 1.08851568 1.06758165\n",
+ " 0.89167155 0.96995244 0.9695067 0.94900704 1.04755863 0.98899173\n",
+ " 1.02910527 1.06362128 0.90493305 0.88576901 1.13919447 1.0507723\n",
+ " 1.14054763 1.05154891 1.04740424 1.05657206 1.1620662 1.09386701\n",
+ " 1.24421394 0.93701143 1.05668102 1.18827134 0.95534258 1.03551698\n",
+ " 0.88230865 0.79902372 0.99159316 0.94505196 1.08449095 0.9730916\n",
+ " 0.94293133 0.96699167 1.0478987 0.72533304 1.04079666 0.77976204\n",
+ " 1.16971542 1.1134519 0.98967753 0.88206468 1.07134635 0.88856444\n",
+ " 1.00103042 0.92871259 0.95662126 1.039322 1.07533408 1.0378673\n",
+ " 1.0999767 1.01495376 1.17860466 1.11217907 0.67786548 0.74402836\n",
+ " 0.81581897 0.79538509 0.8086174 1.09806299 0.77100433 0.88775902\n",
+ " 0.72884603 0.74254449 1.00665157 0.94368947 0.83806514 0.60415284\n",
+ " 0.79177901 1.05391747 1.19810321 1.22746114 1.29106552 1.1020938\n",
+ " 1.06003935 1.1002111 0.866087 0.94733505 0.86598124 0.81411462\n",
+ " 1.11435716 0.85952399 1.04716797 0.92977197 1.03221085 0.90157278\n",
+ " 0.76294876 1.02234578 0.87841979 1.12020041 1.03221054 0.85073256\n",
+ " 1.1318024 1.23102672 1.15866667 1.15072445 0.91379596 1.1417901\n",
+ " 0.86091625 1.04275792 0.97556191 1.01690508 1.02880049 0.99473293\n",
+ " 0.85771273 0.87099647 0.83386394 0.86191246 0.92741925 1.17084685\n",
+ " 1.10837559 1.0321788 1.0701135 1.03078482 0.90085476 1.14133057\n",
+ " 1.027101 1.03900872 1.00065386 0.85937115 0.82137945 0.79299646\n",
+ " 1.11047633 0.92556528 0.95396973 1.23835706 0.86979549 0.97943875\n",
+ " 1.04850322 1.00850363 0.94734684 0.34289268 0.87512954 0.96793836\n",
+ " 0.7705492 0.91512028 1.04515056 1.00646299 1.07097449 1.15248007\n",
+ " 0.84723298 1.02850711 1.06950033 1.02997201 1.03102302 1.13351776\n",
+ " 0.99897171 1.13772859 1.07789824 0.73225912 0.90344498 1.01061712\n",
+ " 0.85345946 0.79546187 0.95608252 1.04677043 0.71610479 1.2081863\n",
+ " 0.99812892 0.93952175 1.10018558 1.11115832 1.31370347 1.00396047\n",
+ " 1.21909612 1.11324722 1.00937434 1.16608213 1.09665772 1.18468329\n",
+ " 0.84015125 0.91514931 0.97468804 1.08261984 1.06229271 1.04300733\n",
+ " 0.9385122 1.08243907 1.20205725 1.12046602 1.11745348 1.09658535\n",
+ " 1.08956827 0.87015073 0.87050505 0.87466987 0.92403863 0.98160023\n",
+ " 1.06300596 1.19887513 0.90771102 0.92246434 1.2778488 0.86490677\n",
+ " 0.90149785 1.13981752 1.1121463 0.96765 1.09466127 1.06313166\n",
+ " 1.09483317 0.93613364 0.98285856 1.23542767 1.09857639 1.09909931\n",
+ " 1.05060359 1.12206432 0.88360616 1.15057634 1.14197305 1.15446141\n",
+ " 0.8360253 0.9697701 1.05375926 0.93421966 1.22178733 1.07755654\n",
+ " 1.05822308 1.10926689 1.04530496 0.9137499 1.16391674 0.89216742\n",
+ " 1.0146274 0.88511123 1.15128298 0.99530629 1.0302839 1.11059496\n",
+ " 0.99055413 0.96763708 1.0066738 1.00649344 1.10473032 0.91147842\n",
+ " 0.98408652 1.04662143 0.74465001 1.13760329 1.11529095 1.10140864\n",
+ " 1.132316 1.03402027 0.97840534 0.90199806 0.92348265 1.22459057\n",
+ " 0.89823441 0.9894786 1.0058044 1.12035529 1.22470351 0.6962837\n",
+ " 0.76944141 1.00305223 0.73447841 1.04628484 0.73915097 1.16281373\n",
+ " 1.06019991 0.92153789 1.11643337 0.90633336 0.81021858 0.92688479\n",
+ " 0.93046189 1.04373554 1.13702732 0.9522439 0.8937294 1.0981802\n",
+ " 0.92635077 0.90392208 0.9739818 1.01105997 0.91538927 0.7883507\n",
+ " 1.16032403 1.13756608 1.06429554 1.17249203 0.93764848 1.05250303\n",
+ " 1.12938989 0.91609784 1.09994336 1.19785524 1.06165344 0.96646597\n",
+ " 1.03735781 0.8557316 1.11429889 0.94725626 1.05901707 1.01443021\n",
+ " 1.02273789 0.88612624 1.02746649 0.94720004 1.05625989 0.9880061\n",
+ " 1.20328657 1.141993 1.19011106 0.98389462 1.20155924 1.17739047\n",
+ " 1.26674988 1.18459612 1.15598195 1.24770696 0.7519667 1.19287855\n",
+ " 1.13260834 1.16943032 1.10154898 1.02113889 0.88087312 1.02944889\n",
+ " 0.91487456 0.99241434 0.85397911 1.09268407 0.83381482 0.73087042\n",
+ " 0.2993128 0.94295102 0.93553927 1.04422846 0.95610232 1.16568228\n",
+ " 0.91802194 0.98736234 1.04197958 0.96022918 1.04592607 1.12926241\n",
+ " 0.79652617 0.68971518 0.60874421 0.95329843 0.90598396 0.79384753\n",
+ " 1.06659486 0.95061722]\n",
+ "Distances (cosine): [0.83982219 0.84785102 0.90776011 1.02314805 0.97351944 0.96741566\n",
+ " 1.0276395 0.89625561 0.97140536 0.9680626 0.87461767 0.91070084\n",
+ " 0.85571179 0.87726713 0.97031846 0.95075474 0.82638339 0.81093675\n",
+ " 0.91729064 1.15065701 0.78785947 0.7760126 0.89931346 1.05650537\n",
+ " 0.829198 0.92791663 0.93113358 0.88057683 0.7884134 0.86230079\n",
+ " 0.7488387 0.96835127 1.03240584 0.89165565 0.85594543 0.9663714\n",
+ " 0.92936082 0.87384386 0.95153604 1.14959772 0.92169362 0.81786561\n",
+ " 0.94864521 0.90101922 0.97625422 0.87099712 0.78814978 0.87961081\n",
+ " 0.83725266 0.83648681 0.98248919 0.87454905 0.94699392 0.95577508\n",
+ " 1.03973307 1.06600922 0.83958304 0.88198893 0.9518607 0.90186598\n",
+ " 0.91285739 0.85497226 0.93937096 0.77496703 0.75733635 0.8661877\n",
+ " 0.92229698 0.82407183 1.21382444 0.78398638 0.88237627 0.97814677\n",
+ " 0.81653622 0.9484589 0.89482532 0.8843362 0.87922186 0.9002086\n",
+ " 0.78705286 0.93409233 0.99388718 1.0680285 0.91095664 1.03025111\n",
+ " 0.78580813 0.8436553 0.93305259 1.03887799 0.97903286 0.9430605\n",
+ " 0.91649236 0.88852978 0.91824002 0.75891342 0.8015122 0.87619064\n",
+ " 0.73523862 0.86456445 0.7624684 0.94000566 0.94033632 1.06313686\n",
+ " 0.96208168 0.92663336 1.14299293 1.02748696 0.91412587 0.8995744\n",
+ " 0.92206998 0.93240692 1.21035331 0.87219708 0.88062903 0.92679163\n",
+ " 1.11864855 0.96968483 1.00982425 1.01913841 0.96835847 0.98072824\n",
+ " 0.94784449 0.94313874 0.79626409 0.95921722 0.93729631 0.92383629\n",
+ " 0.87459493 0.92591119 0.82077909 0.81849373 0.86869846 0.84089939\n",
+ " 0.96522306 0.84327203 0.94066242 0.64181256 0.73187248 0.88266535\n",
+ " 1.13182673 0.9305205 0.82597666 0.7585566 1.07579213 0.85695367\n",
+ " 0.94959537 0.80932276 1.03325591 0.82823273 1.02415805 0.93829861\n",
+ " 1.06584009 0.92467575 0.98697582 0.92369635 0.99423894 1.06658031\n",
+ " 1.12431665 0.8791592 0.93126662 0.95649215 0.98616254 1.15970283\n",
+ " 0.95110275 0.95486327 1.02529751 1.02537272 0.99123403 0.96104428\n",
+ " 1.08004035 0.96372254 1.02796605 1.16202925 1.10931924 1.03950115\n",
+ " 0.67444796 1.04117878 0.9253668 1.12238924 0.91287551 1.09943736\n",
+ " 0.9397423 1.07233221 1.13432055 0.8971677 0.98697484 0.94866979\n",
+ " 1.19841829 1.1163729 1.1752368 1.19714159 1.09021128 0.99427685\n",
+ " 0.87303316 1.06247523 0.72268648 0.87575965 0.81329992 0.84926394\n",
+ " 0.823151 0.79669145 0.86026759 1.06873049 0.9908547 0.896792\n",
+ " 1.09652113 1.15642432 1.04563371 1.17190212 1.0430178 1.04038442\n",
+ " 0.97872475 0.42179418 1.0763532 0.85951825 0.88685591 0.9304495\n",
+ " 1.07086197 1.17538819 1.00500645 1.10846284 1.07319705 0.98385834\n",
+ " 1.10666766 0.92636653 0.75920756 0.74586954 0.92987697 1.13174534\n",
+ " 1.02848161 1.0589783 1.0494612 1.00315614 1.0899506 0.68315579\n",
+ " 0.71132201 0.91063833 0.97714155 0.99743367 1.00814262 0.91182988\n",
+ " 0.90650283 0.92784015 0.8943184 0.96523166 0.99842732 0.93602733\n",
+ " 1.05809203 0.84680145 0.8361474 1.05189417 0.90319348 0.87576888\n",
+ " 1.08584903 0.97451356 0.9542597 0.96047801 1.07603222 0.89307278\n",
+ " 0.98118101 0.98239469 0.72978935 1.12874184 0.85426922 0.91951615\n",
+ " 0.94991815 0.86971732 1.05665631 1.0161311 0.9456956 0.96549672\n",
+ " 1.06706535 0.93292058 1.06175235 1.08963296 0.77549469 1.17910843\n",
+ " 0.89518773 0.7727377 0.84058263 1.08645045 1.04201252 1.11227056\n",
+ " 1.07271693 0.97848387 1.06948628 0.98352737 1.09474525 0.85722132\n",
+ " 0.82955252 0.89932279 0.92746862 0.94601251 1.05550788 0.81629678\n",
+ " 0.95141732 1.0044965 0.94777282 0.97452741 1.00931873 1.04310104\n",
+ " 1.1027806 0.8064357 0.96322069 1.01615433 0.94639179 0.85594334\n",
+ " 0.81685723 0.94120645 0.92537809 0.9273052 1.04502357 0.94628854\n",
+ " 1.02062871 0.96469043 0.96802887 0.82090063 0.95881449 1.03719752\n",
+ " 0.92051657 0.96351419 1.00267028 0.97170391 0.84480476 0.97389411\n",
+ " 0.86063151 1.07647178 1.04622677 0.8251521 1.08886852 1.11313586\n",
+ " 1.21629142 1.07772684 0.95607897 1.05896914 0.86033253 1.02620132\n",
+ " 0.93869738 0.8912867 1.07543381 0.86156398 0.99267743 1.04211705\n",
+ " 1.05557462 1.03482199 1.12549084 1.06585417 1.06006664 1.07173559\n",
+ " 1.05398277 1.0921622 1.09462826 1.09734843 0.99044405 0.88777541\n",
+ " 0.99965784 0.94439868 0.82043424 0.8067286 0.91389158 0.89179502\n",
+ " 0.92156254 0.96583612 0.9076891 0.92267145 0.98248093 0.9787519\n",
+ " 0.98730859 0.88439251 0.80828591 0.92289365 0.96251868 1.09972352\n",
+ " 0.92498168 0.94249131 0.75760173 0.9107805 0.97897482 0.97845514\n",
+ " 0.94113377 0.90013829 1.16987998 0.99487082 0.90670791 1.02335443\n",
+ " 0.98233296 0.99151606 1.0643847 1.03969033 1.08502443 0.79830973\n",
+ " 0.80073052 0.95676193 0.98116028 0.83190328 0.91343548 0.96488295\n",
+ " 1.06650923 0.90421635 0.95303157 1.09465905 1.07842264 1.06064988\n",
+ " 1.03381408 0.95927114 0.95209276 0.95476949 1.12373465 0.9764688\n",
+ " 0.89788269 1.12562384 0.93634793 0.95112767 0.90993235 0.93123172\n",
+ " 0.90839171 0.79005081 0.95680102 1.05974716 0.89222202 0.96433866\n",
+ " 0.86013926 0.96954403 1.01800707 0.8546139 0.99400178 0.90046957\n",
+ " 0.95515291 0.89555865 0.96463509 0.97542993 0.76062643 1.01976874\n",
+ " 0.91371625 1.01296803 1.16761464 0.86405681 0.85940288 0.84901624\n",
+ " 0.961322 0.8853649 1.03240755 1.09282756 1.0582686 0.97615913\n",
+ " 1.07092681 1.03622101 1.13242353 0.94513265 1.10420882 1.09398704\n",
+ " 0.90392405 1.00287343 0.87426004 0.95353373 1.00423972 1.15282195\n",
+ " 0.92642633 1.02726548 1.09961707 1.10196001 0.95526778 0.99850358\n",
+ " 0.9299175 0.71153106 0.92492178 0.91911734 0.90496375 1.0560359\n",
+ " 0.83018137 1.01470491 0.93748026 0.93381658 1.0396781 0.95117681\n",
+ " 1.03891648 0.95291193 0.92972046 1.10927736 0.94541117 0.77244077\n",
+ " 0.8986492 1.17941398 1.10973725 1.01314031 0.98108618 1.05164962\n",
+ " 1.02824497 1.03257122 1.03255457 0.87539449 1.07916504 0.9372701\n",
+ " 1.01987329 1.05935737 1.1227104 0.93691796 1.09385777 0.93954998\n",
+ " 0.98463532 1.09201688 1.05532249 0.86348964 0.28497986 1.03508338\n",
+ " 0.9144048 1.03569389 0.88623459 0.8517852 0.97284546 0.88343987\n",
+ " 0.93246018 1.03807684 0.95927568 1.08822179 1.14241887 0.9794039\n",
+ " 1.0912476 1.17105619 1.00733372 0.9499505 0.77723851 0.9553672\n",
+ " 0.63970347 1.06922566 0.89930137 0.83539866 0.86579453 1.03265095\n",
+ " 0.97444264 1.10736886 0.99274923 1.05719293 1.10509205 1.18244716\n",
+ " 1.18197589 0.95061164 0.98734152 1.03674675 1.03118816 0.96821149\n",
+ " 0.8415231 0.91984721 0.90123214 1.10649042 0.87484199 1.11096457\n",
+ " 0.80542953 0.75734181 1.00161031 0.93393053 0.96641354 0.86810317\n",
+ " 1.03402745 0.91914951 0.9168397 0.94991551 0.88957818 0.88231241\n",
+ " 0.92088129 0.96883299 0.91619874 1.06988146 0.95248955 0.90171962\n",
+ " 0.86533417 1.08349 1.04284209 0.98929974 0.97544289 1.07066403\n",
+ " 1.00291403 0.95816408 0.92349372 0.74046173 0.97541474 0.90516995\n",
+ " 1.00635323 1.00632329 0.90460869 0.94101986 1.06189401 0.93060286\n",
+ " 0.86548054 0.93544486 0.93254304 1.0129833 0.95061175 0.90683773\n",
+ " 0.88853853 0.98255584 0.91474305 1.10819666 0.87282044 0.98319624\n",
+ " 0.90201955 0.9980095 0.95492366 1.01014606 1.04339851 1.04438811\n",
+ " 0.92685162 0.95695888 0.93808089 0.94277124 0.95126032 0.97861442\n",
+ " 0.95287238 1.06480476 1.06661607 0.91943395 0.9390805 1.06516211\n",
+ " 1.0162961 0.62879333 0.9048945 1.02400905 1.07808582 1.06754816\n",
+ " 1.13196697 1.09269156 0.84236832 1.08009309 0.99870547 0.87097401\n",
+ " 1.0378727 0.98838071 1.09975598 1.02055669 1.09022213 1.15971419\n",
+ " 1.15588799 0.85557819 0.91808862 0.90078535 1.03050155 1.12352585\n",
+ " 0.84238093 0.85176117 0.90034569 0.98887254 1.04366276 0.98884479\n",
+ " 1.01175392 0.99482038 0.90209062 1.11891912 0.98325245 1.12797318\n",
+ " 0.90787391 0.91128414 1.05562978 1.12476002 1.12515884 1.18611659\n",
+ " 1.1241141 1.13077948 1.24682286 1.13763513 0.98502705 1.18389314\n",
+ " 1.1275269 1.09887389 0.99844863 1.01032319 0.96989211 1.01902095\n",
+ " 1.03063124 0.85184889 1.03663465 1.12045824 1.07241014 0.84827262\n",
+ " 1.06891276 0.94178326 0.97640155 1.25424995 1.10952564 1.07707641\n",
+ " 1.04159983 0.91097926 0.94709074 1.08578436 1.12291252 1.09587783\n",
+ " 1.06031976 1.02719976 1.11599243 0.7766615 0.93267035 0.87681768\n",
+ " 0.88557382 1.00830325 1.15578183 0.83142329 0.81066989 0.96166708\n",
+ " 1.05495681 0.94570084 1.06865094 1.05429961 0.98655212 0.97120727\n",
+ " 1.06576466 1.06581906 0.90306465 1.13854317 0.98534803 0.78872527\n",
+ " 0.85589071 0.9724319 0.92203949 0.88964418 0.85906861 1.06078203\n",
+ " 0.82066271 1.05660442 0.98932137 0.67748615 1.04096299 0.8491599\n",
+ " 0.88636178 1.03712949 1.17872256 0.92172776 1.14845979 0.92383507\n",
+ " 0.89531017 0.77027701 0.81536138 0.89685088 0.83207501 0.93536151\n",
+ " 0.87800656 0.92621417]\n",
+ "Distances (cosine): [0.88854661 0.93535573 0.95566299 1.0992454 1.04232769 0.93908778\n",
+ " 0.97529726 0.93724327 0.98466445 0.98436148 0.92776596 0.95867325\n",
+ " 0.9015388 0.75926466 1.04239868 0.93745503 0.80905665 0.74836049\n",
+ " 0.98046743 1.12016825 0.7796834 0.73896959 0.95374754 1.04523808\n",
+ " 0.94908038 0.96700333 0.93094414 0.85313938 0.76004633 0.88508848\n",
+ " 0.81254694 0.94427531 1.0772097 0.93915077 0.98289886 0.9884915\n",
+ " 0.9798818 0.92793742 0.97571055 1.14765888 0.89771089 0.87803173\n",
+ " 0.93651781 0.94548152 1.01386854 0.80496964 0.84165258 0.78692748\n",
+ " 0.86488069 0.92018787 1.00571559 0.87486086 0.97893336 1.03088592\n",
+ " 1.00402299 1.08792789 0.79494837 0.84043649 0.96861954 0.96710154\n",
+ " 1.05617381 0.92933409 0.94203174 0.8996385 0.77335779 0.90791604\n",
+ " 0.93024941 0.88205115 1.16782275 0.85893835 0.87152914 1.00340627\n",
+ " 0.8198323 0.96938953 0.90388925 0.91710324 0.91447372 0.91702234\n",
+ " 0.77045147 0.96357584 1.03010885 1.01081402 0.98426977 1.00452193\n",
+ " 0.77699693 0.89529438 0.91415218 0.99691809 0.92864517 0.94082482\n",
+ " 0.92814063 0.93653862 0.95286705 0.79032246 0.85119332 0.83971469\n",
+ " 0.72490728 0.89813333 0.81672217 0.91562064 0.91878681 0.9847874\n",
+ " 0.88131843 0.8780673 1.15309324 1.06804647 0.94671827 1.00616375\n",
+ " 0.90943882 0.98291827 1.08008955 0.78618742 0.81880992 0.97145214\n",
+ " 1.13936336 0.92513142 1.03552611 0.95810136 0.84620437 1.01062855\n",
+ " 0.96586741 0.85223209 0.81876306 0.9231071 0.93232581 0.91601185\n",
+ " 0.85607175 0.86135248 0.93006385 0.7925004 0.8709093 0.79113817\n",
+ " 0.99869097 0.83779687 1.04647275 0.66552227 0.75222458 0.85527416\n",
+ " 1.10880217 0.94491016 0.8445618 0.88724102 1.01351084 0.90528557\n",
+ " 1.04940253 0.94038024 1.05252444 0.88527113 1.01624538 0.92180601\n",
+ " 1.0779799 0.9318702 0.93154812 0.87913256 0.92613203 1.06089638\n",
+ " 1.0496537 0.90248837 1.00136466 0.93624822 1.05784133 1.10248993\n",
+ " 1.02003456 0.94919126 1.04743428 1.06552364 1.00991393 1.00871286\n",
+ " 1.05407849 1.01087119 1.00124155 1.11737421 1.07620199 0.99554157\n",
+ " 0.67534583 1.06370219 0.98719407 1.05000781 0.98793233 0.93992445\n",
+ " 0.8648117 0.98183254 1.09041767 0.91530163 0.90750206 1.007404\n",
+ " 1.21502798 1.13061648 1.07138072 1.10673015 1.04533886 0.96634494\n",
+ " 0.86678847 1.01949455 0.7548848 0.9316641 0.89067766 0.98237388\n",
+ " 0.84723566 0.76028446 0.91058239 1.01143483 0.88421335 0.82849344\n",
+ " 1.06020214 1.09512101 0.9756496 1.19083574 1.05324559 1.02598073\n",
+ " 0.90213561 0.24359352 1.04870365 0.89487018 0.92296778 0.90318676\n",
+ " 1.1066017 1.17741352 1.06628709 1.16389887 1.17732702 0.92736167\n",
+ " 1.09003179 0.96235566 0.75124781 0.81434102 0.96731148 1.19494504\n",
+ " 0.99049415 0.95595134 0.99139012 0.90069196 1.01763937 0.71698978\n",
+ " 0.74405896 0.96077843 1.00700392 1.10036478 1.0933927 1.043796\n",
+ " 1.01470792 0.97665691 0.97497146 1.04839987 0.99490752 0.99951178\n",
+ " 1.14131467 0.88970852 0.90703467 1.02606924 0.9206474 0.96423554\n",
+ " 1.03287218 1.09474263 1.08178116 1.09447471 1.16832823 0.89540997\n",
+ " 0.9856401 1.02458196 0.60350519 1.14220224 0.95002545 0.98347149\n",
+ " 1.04132877 0.90594256 1.0215918 1.07208929 0.98809117 0.87789419\n",
+ " 0.97113009 1.01711889 1.08608649 1.04056247 0.82180229 1.09756644\n",
+ " 0.89244455 0.75109727 0.8188796 0.91826007 0.98833501 1.18054305\n",
+ " 1.00671416 0.92931991 1.02468582 0.99020217 1.13428362 0.89072088\n",
+ " 0.79158203 0.91734211 0.95559386 0.8490471 1.05446533 0.91023561\n",
+ " 0.91444439 1.00976131 0.87304203 1.02936048 0.98857838 1.03792764\n",
+ " 1.03887263 0.87399044 0.96547662 1.03734154 0.93361524 0.96989087\n",
+ " 0.80734265 0.90700398 0.93672325 0.95748053 1.09720091 0.90462571\n",
+ " 0.99092383 0.97185922 0.89905031 0.88163034 1.01038549 1.03074523\n",
+ " 0.79684087 1.01869718 1.02426256 1.04022714 0.88473702 0.96200774\n",
+ " 0.89911986 0.9937136 0.99364067 0.85127677 1.09413489 1.05929079\n",
+ " 1.11527708 1.13685375 1.01247729 1.15272817 0.97721515 1.11067388\n",
+ " 1.02703824 0.99002758 1.17909438 0.80831469 0.96752337 1.07454473\n",
+ " 1.02881676 1.01894465 1.15747444 1.06327284 1.10937509 1.0662275\n",
+ " 1.01290897 1.01346796 1.07741285 0.99996133 0.97171158 0.89470364\n",
+ " 0.96028581 0.88103325 0.8918453 0.74922088 0.93877911 0.9392977\n",
+ " 0.99862538 0.96946446 0.94156315 0.98249781 0.89994199 0.8818737\n",
+ " 1.00129569 0.7549443 0.75875039 0.82143113 0.82550065 1.12018777\n",
+ " 0.95560585 0.97731029 0.79310252 0.83403924 1.01919212 0.81186759\n",
+ " 0.85130953 0.90162415 1.24860577 1.01141824 0.82370449 0.95940567\n",
+ " 0.91333976 0.94908897 1.06640407 1.07150973 1.07467352 0.84610021\n",
+ " 0.85269014 0.97815795 0.98096578 0.85801584 0.84736488 0.94856694\n",
+ " 0.972406 0.9722913 1.00493065 1.0401883 0.95487611 0.9684137\n",
+ " 0.95025026 0.85532612 0.83462718 0.82656569 0.91015478 0.92844003\n",
+ " 0.83341569 1.07552053 0.94396092 0.93901193 0.94422024 0.94570562\n",
+ " 0.89300932 0.84843467 0.97828099 1.05355946 0.8682839 0.89917458\n",
+ " 0.86559981 0.86564598 0.93942669 0.95932969 1.03768357 0.92347587\n",
+ " 0.92277434 0.91165416 0.87949991 0.97673509 0.76170632 1.03767909\n",
+ " 0.94996485 1.03627748 1.13665196 0.87650808 0.88580915 0.88931871\n",
+ " 0.98560939 0.91527734 0.98914867 1.02984831 1.06844397 1.03695579\n",
+ " 1.08312421 1.07590316 1.15577791 0.87841788 1.07197609 1.09322074\n",
+ " 0.94215641 0.99104404 0.89665966 0.99563357 1.03463995 1.11967122\n",
+ " 0.89208439 0.9147318 1.01192715 1.07244082 0.85706129 0.87855845\n",
+ " 0.99499838 0.7456463 1.03799592 1.02368505 0.85342308 1.03088939\n",
+ " 0.81698497 0.93273533 0.82729722 0.83869122 0.81469767 0.78499204\n",
+ " 1.0412911 1.04575936 0.91847563 0.95084511 0.95042579 0.82522765\n",
+ " 0.85316874 1.06090088 1.07002173 0.94845731 1.02648298 1.00491015\n",
+ " 1.0508358 1.0421335 1.07715938 0.90897688 1.10427872 0.91174623\n",
+ " 0.99310208 1.03421372 1.10971465 0.95349792 1.04706729 0.94004187\n",
+ " 1.01363106 1.117531 1.05160972 0.94112959 0.33572468 0.97084519\n",
+ " 0.82163279 1.02819679 0.91100412 0.95597098 1.0529896 1.0432342\n",
+ " 0.90857113 0.98145161 0.93520873 0.99538878 1.08438753 0.99700416\n",
+ " 1.10187518 1.1265943 1.04552854 0.95757395 0.65224465 1.02114559\n",
+ " 0.74535156 0.95059508 0.85655671 0.75028233 0.84530511 0.93947761\n",
+ " 0.94882796 1.05765046 0.91592452 0.99029967 1.05794661 1.13767646\n",
+ " 1.04964906 0.88260439 1.05936722 1.00782229 0.91147532 0.99435287\n",
+ " 0.85978455 0.95126841 0.89534315 0.98904559 0.90862669 1.23398777\n",
+ " 0.78012026 0.7719898 1.0503023 0.96849657 0.90603306 0.90801825\n",
+ " 1.00757591 0.93336687 0.91882677 0.84470749 0.78403129 0.89981942\n",
+ " 0.94222481 0.96316751 0.97496465 1.07233092 0.95668892 0.89443414\n",
+ " 0.96271262 0.97755991 0.96799905 0.87366715 0.84264935 1.01705424\n",
+ " 1.05662739 1.03920871 0.87084573 0.83902794 0.99334965 0.97077595\n",
+ " 1.05495587 1.05705738 0.91891834 1.01370283 0.96762996 0.90008765\n",
+ " 0.92389428 0.98926664 0.98883443 0.99164297 1.00598475 0.9692271\n",
+ " 1.0040501 1.02159256 1.01696469 1.11158463 0.90283847 0.98645527\n",
+ " 0.93691934 1.0649083 1.032662 0.95134084 1.02888426 1.01829235\n",
+ " 1.00510291 0.95274305 0.91926869 1.04332504 0.93988296 0.89598364\n",
+ " 0.9289698 1.07964229 1.08443517 0.82637489 0.92479075 1.06170418\n",
+ " 1.06732813 0.70846525 0.859892 1.08786256 1.06125499 1.09323918\n",
+ " 1.1995612 1.14936838 1.02143421 1.12666261 0.97344578 0.7960604\n",
+ " 1.0210266 0.9129015 1.02397339 1.05973155 0.98411833 1.05859259\n",
+ " 1.02643828 0.95546207 1.01273753 0.97497776 1.03097955 1.03673751\n",
+ " 0.88663524 0.93046881 0.98481718 0.87127478 1.00985973 1.04663223\n",
+ " 1.05707287 1.00545479 1.01466676 1.10097553 0.99597789 1.00832186\n",
+ " 0.94005095 0.95161152 1.0502331 1.05781499 1.09599124 1.10272985\n",
+ " 1.10111051 1.04098684 1.06512838 1.09642362 1.03211982 1.0091015\n",
+ " 1.01408179 1.08261319 1.06687056 1.05388288 0.89824859 1.04448299\n",
+ " 0.99647922 0.75837944 0.93652012 0.97218199 0.98729871 0.85760041\n",
+ " 0.96287443 0.96080893 0.90712317 1.12434378 0.95547604 0.97889561\n",
+ " 0.98253368 0.9310875 0.86301799 1.03788749 1.03593835 0.98589283\n",
+ " 0.99745003 0.9960673 1.08923135 0.84030729 0.96328136 0.8224505\n",
+ " 0.81737212 1.03283699 1.09826853 0.79793883 0.7534206 1.00184691\n",
+ " 1.0184541 0.98870238 0.94042559 0.89773533 0.92146944 0.905453\n",
+ " 1.02989765 0.90711587 0.85863145 1.11191449 1.02635752 0.79206224\n",
+ " 0.90169724 0.94515589 0.95893484 0.92295073 0.84124368 0.99636405\n",
+ " 0.91138248 0.99453601 1.0235495 0.71193628 1.00723615 0.87498045\n",
+ " 0.88340703 0.989559 1.16255462 1.0048785 1.14901274 1.08011484\n",
+ " 0.95009866 0.77365269 0.8453254 0.92191271 0.89653263 0.99906851\n",
+ " 0.85348517 0.83262151]\n",
+ "Distances (cosine): [1.0707766 1.05280123 0.9955073 1.17160808 1.08158984 1.12031085\n",
+ " 0.92118866 1.05903729 1.13958094 0.85620418 0.89963888 0.91450554\n",
+ " 0.87559333 0.56488214 1.12598696 1.05489714 0.85154985 0.87269421\n",
+ " 1.13103236 1.1685046 0.86397561 1.02124688 1.17886701 1.1875973\n",
+ " 1.02783972 0.99565907 1.06847435 0.86179594 0.95589025 0.96976869\n",
+ " 0.90104044 1.08762538 1.35259132 1.12069541 0.93859056 1.17618556\n",
+ " 1.13213007 1.02420363 1.04806411 1.24254915 1.14539744 0.79004292\n",
+ " 0.96451416 1.12711291 1.0771372 0.79692315 0.86517961 0.72162279\n",
+ " 0.76688949 0.79993995 1.06285664 0.78942731 1.11239941 1.22366833\n",
+ " 1.09495333 1.06526128 0.90207844 0.96916852 0.95516145 0.90784846\n",
+ " 1.09070536 0.87251183 0.97037547 0.85331474 0.84954116 0.93874252\n",
+ " 1.1569516 1.01462285 1.29566637 0.79245699 0.75335928 1.0466561\n",
+ " 0.86098592 1.13171083 1.17231135 1.19100685 0.77839495 0.97397046\n",
+ " 0.83531213 1.14957035 0.94193767 1.07939276 0.97823706 1.16874725\n",
+ " 0.92773496 0.99416937 0.89072113 1.02999901 1.25049625 0.91774146\n",
+ " 1.20105933 1.052994 0.88800097 0.81103482 1.05202223 0.9100579\n",
+ " 0.81157463 0.97661432 0.85762017 0.86693153 1.00931338 1.14277468\n",
+ " 1.03151954 0.96114609 1.1692051 1.13029583 1.14015435 0.96348432\n",
+ " 1.05894499 0.99901679 0.98279746 0.80904084 0.94578394 1.04990323\n",
+ " 1.20607512 1.06188527 1.20115547 1.0557038 0.78506381 1.05653173\n",
+ " 0.98065749 0.79715656 0.84820726 1.02779077 0.7928568 1.09311799\n",
+ " 1.21482736 1.16284229 0.87710987 0.95833227 1.09038437 0.95680158\n",
+ " 1.02688945 0.89169632 1.09909908 0.61980567 0.63087263 1.17514766\n",
+ " 1.09503016 0.9240841 0.82125754 0.93721084 1.12379649 0.88617211\n",
+ " 0.94857259 0.83534591 1.14717976 0.87703866 1.03279811 0.84797477\n",
+ " 0.95687695 0.75843061 0.89241044 0.73669267 0.88678902 0.97519241\n",
+ " 0.91490703 0.82225793 0.94467426 0.76523249 1.0287892 0.95648406\n",
+ " 0.99154697 1.10012846 1.07942535 1.04017368 1.00063467 1.01732551\n",
+ " 0.94196829 0.92472687 1.02225475 1.12617411 1.17398001 1.13984747\n",
+ " 0.79310398 1.12481898 0.97835276 1.15064918 0.9573869 0.97662453\n",
+ " 0.92393886 1.08306903 1.20483003 0.83383811 0.93226026 1.159906\n",
+ " 1.19187156 1.19173845 0.97592162 1.12447657 1.0131322 0.9996813\n",
+ " 0.88470972 1.17916519 0.92069851 0.95534954 0.99883255 1.01778056\n",
+ " 0.87506954 0.70270725 0.84249383 1.01799434 0.80857697 0.96059062\n",
+ " 1.17313307 1.22847605 1.07144521 1.18944848 0.87675349 0.94737759\n",
+ " 0.82884484 0.65088536 0.96848544 0.97931229 0.85461497 1.06146485\n",
+ " 1.12841895 1.13208836 1.01549957 1.13389109 1.10949864 0.86936205\n",
+ " 1.16171667 0.99191634 0.69886441 1.01244771 1.06384391 1.16342043\n",
+ " 0.87714648 0.79509355 0.82465337 0.92394204 1.17326398 0.83171855\n",
+ " 0.72586251 0.97521097 1.23270537 0.95950087 0.90448721 1.09748251\n",
+ " 1.11664194 1.11381386 0.85843497 1.14389576 1.005197 0.92424425\n",
+ " 1.15166063 0.96106449 0.94491532 0.95296302 1.0005168 1.10315638\n",
+ " 1.1045582 1.10144251 1.08773441 1.10478829 1.05251734 0.79515185\n",
+ " 1.12874883 0.73995228 0.10969604 1.23824124 1.12607204 0.97018169\n",
+ " 0.84349875 1.01433251 1.08762463 1.07383379 1.11160222 0.84134066\n",
+ " 1.12378758 0.99942415 0.99125837 1.06813928 0.87961573 1.04047644\n",
+ " 0.89610017 0.85896746 0.87468646 0.94542567 1.00066046 1.20462196\n",
+ " 0.8019749 0.70664758 1.03514693 0.9561904 1.04864485 0.7093718\n",
+ " 0.62870155 1.05911608 1.06181346 0.87222819 1.05868497 0.84837236\n",
+ " 0.88730635 1.06601646 0.70433699 1.07299319 1.00986177 1.07600932\n",
+ " 1.17921435 0.91737088 1.09676001 0.94393789 1.01761549 0.93144638\n",
+ " 0.99679325 0.92311187 0.89198162 1.19355581 1.09076016 0.89926787\n",
+ " 0.92093785 1.13603794 1.07319538 0.89816184 1.17260584 0.8685913\n",
+ " 0.72768754 1.07962396 1.00187679 1.16232327 1.02152627 0.94809315\n",
+ " 1.10274643 1.00768191 1.08868491 0.67114737 1.03659213 0.98192373\n",
+ " 1.24505852 1.05650829 1.14276317 1.20663884 0.95341254 1.20778328\n",
+ " 0.98449955 0.78187294 1.06576426 0.99532712 1.12295518 1.15194253\n",
+ " 0.97219432 1.03066045 1.10710576 1.08129495 1.11898624 1.14102453\n",
+ " 1.18983583 1.08883795 1.24885187 1.01155629 0.97168047 0.95805238\n",
+ " 1.1544316 0.89415283 0.95257411 0.47803098 1.04583569 1.09083083\n",
+ " 1.22131604 0.99894557 0.97008813 1.04828543 0.8483809 0.6909563\n",
+ " 0.92722364 0.84583213 0.53728143 0.80789167 0.87926639 1.09680375\n",
+ " 1.02926318 1.05994729 0.92270033 0.9860167 1.08585048 0.79263545\n",
+ " 0.7101507 0.97877949 1.2687709 1.04134898 0.83306886 1.1589305\n",
+ " 1.09108209 1.05862219 1.15065944 0.9895469 1.13335957 0.87489193\n",
+ " 0.99816352 1.09310817 0.87937897 0.9975643 0.85158909 0.91528298\n",
+ " 1.14823249 0.9730438 0.95262801 1.04994024 0.90527764 0.90592552\n",
+ " 0.92390878 0.71703474 0.71430088 0.84390663 0.85046283 1.06762321\n",
+ " 0.85875198 1.20749691 1.08311771 1.06071818 1.00787347 1.13458033\n",
+ " 0.90840748 0.86493704 0.88905027 1.20236518 0.82467593 0.79638833\n",
+ " 0.94015004 0.89574558 0.84816608 1.12020455 1.18948709 0.96355574\n",
+ " 0.89144563 1.00294445 0.93029411 1.10587289 0.82399037 1.0332549\n",
+ " 1.07269309 1.06434346 1.16664526 0.9374199 0.96101422 0.98371068\n",
+ " 0.99968115 1.02873883 1.17144571 1.23749812 1.0685983 1.00944943\n",
+ " 1.13145731 1.13154877 1.15308064 1.08644582 1.06773066 1.18014143\n",
+ " 1.00819988 1.09432164 1.10547295 1.07079659 1.1055887 1.05713438\n",
+ " 1.06141331 0.80706438 1.1511425 1.27129729 1.01105695 1.00313801\n",
+ " 1.2067822 1.05971195 0.96269274 0.96931016 0.86215488 1.06673718\n",
+ " 0.99874871 0.87535304 0.72363765 0.47000197 0.4297472 0.38611815\n",
+ " 1.084379 0.99830932 0.9364342 0.99420145 1.07922776 0.63959544\n",
+ " 0.92583125 1.2054432 0.9701016 0.94517586 1.09821189 1.12033783\n",
+ " 1.0815286 1.04824306 1.10834837 0.99444181 1.12368013 1.05690306\n",
+ " 1.07400025 1.07397141 1.12357004 1.05428371 1.14635013 1.15813833\n",
+ " 1.16037201 1.08660616 0.99585101 0.99682737 0.67192031 1.02574674\n",
+ " 0.97536751 1.07420962 1.06855534 0.95389835 1.04703887 1.09578469\n",
+ " 1.06120072 1.07446243 1.01453858 1.14733232 1.07169658 1.17781383\n",
+ " 1.13943047 1.20070502 1.15549696 0.72363878 0.14631016 1.08108698\n",
+ " 0.5644057 0.86192329 1.00921063 0.92421146 1.11172979 0.85903813\n",
+ " 0.93092179 0.85038804 0.72389987 0.83811482 1.08522966 1.15344123\n",
+ " 1.05811808 1.04858699 1.18602912 1.11143411 0.8143762 1.0353329\n",
+ " 1.07940158 1.01631029 1.01519385 1.0912292 1.05080297 1.25230026\n",
+ " 1.08900922 0.79070553 1.1240446 1.09465309 1.17808774 1.03194985\n",
+ " 1.08374749 1.02921053 1.1533667 0.86490411 0.80882295 1.10369415\n",
+ " 1.01595574 0.96359154 0.96155064 1.03399784 1.08004809 0.89753643\n",
+ " 0.80933228 0.91130579 0.93737199 0.64478826 0.63983077 1.06906304\n",
+ " 1.07269409 0.89556035 0.87881538 0.82222404 1.08412663 1.07405611\n",
+ " 1.1609198 1.01921209 0.86552584 0.99198972 1.08774114 1.0038623\n",
+ " 1.04381888 1.17557574 1.10934508 1.0495157 1.00361621 0.98750459\n",
+ " 1.00600281 0.866933 1.22320141 1.15964127 0.9223702 1.01071476\n",
+ " 1.17040306 0.98981133 1.16337759 0.96342768 1.18193461 1.24729479\n",
+ " 0.93252344 0.93744735 0.89354138 0.90250084 0.97673168 0.71493482\n",
+ " 1.14241983 1.29748264 1.04069007 1.07716713 1.18216273 1.27673852\n",
+ " 1.05450868 0.7930859 0.97051747 1.05162784 1.10123659 1.20944247\n",
+ " 1.17396433 1.13823873 0.93590913 1.13667177 0.90678173 0.99935817\n",
+ " 1.017066 1.19018696 1.01791996 1.01858701 1.12630577 1.20055061\n",
+ " 1.07959903 1.02183581 0.98886601 0.95957267 1.09341253 1.12347024\n",
+ " 0.8995329 0.94747279 0.88998613 0.85633952 1.06037074 1.07226954\n",
+ " 1.167153 1.1829139 1.07442234 1.18923222 1.040901 0.99311155\n",
+ " 0.97054662 0.95859464 1.23908463 1.17653314 1.16161771 1.05212636\n",
+ " 1.19685325 1.19860524 1.12264061 1.12272311 1.11960892 1.07556301\n",
+ " 1.16410659 1.2426215 1.17736136 1.18240592 0.99457873 1.07010627\n",
+ " 1.08507805 0.60804181 1.15316131 1.00758689 1.13266134 0.92026177\n",
+ " 0.84988191 1.0884377 1.05789948 1.10768988 0.95437437 1.01191592\n",
+ " 1.07748736 0.97389829 1.05961888 1.16361625 1.13197989 0.76352389\n",
+ " 1.19451366 1.06339147 0.82791458 0.96113329 1.02483405 0.86196949\n",
+ " 0.96858477 1.15501349 1.21126379 0.88359168 0.99246154 1.18522045\n",
+ " 1.24626845 1.06376186 0.88397772 0.86386253 0.86948727 0.81432838\n",
+ " 1.01676635 0.83541524 0.72698901 1.10348465 0.9917887 0.726935\n",
+ " 0.6704033 0.81560704 0.80335049 0.93920711 0.79580004 0.64225906\n",
+ " 0.89948641 1.11563232 1.06917206 0.95443525 0.97173503 1.11851539\n",
+ " 0.95034211 0.86768788 1.10490141 0.98360433 1.11956416 1.07092587\n",
+ " 0.97862622 0.70046117 0.85149563 0.92829699 0.97894423 1.08528323\n",
+ " 0.90993579 0.75658906]\n",
+ "Distances (cosine): [0.87305379 0.92188638 1.10964091 1.00927031 0.91277953 0.76351853\n",
+ " 0.95577331 0.89604419 0.794972 0.97523842 0.92141706 0.91347802\n",
+ " 0.92553099 1.03385599 0.87806024 1.04314092 0.90358162 0.93587011\n",
+ " 0.8601977 0.72684806 0.87189268 0.89277431 0.81435519 0.70403952\n",
+ " 1.15409355 1.18083872 1.1668457 1.15052955 1.15459624 1.02288806\n",
+ " 1.02935228 0.84968761 1.14120797 1.17632339 0.85756496 1.04557821\n",
+ " 1.11232829 0.81956018 0.84660796 1.0175985 0.84172803 0.69740249\n",
+ " 0.64336418 1.0110396 0.89424831 0.92664214 0.85008909 0.64665824\n",
+ " 0.96393216 0.99999916 0.84598545 1.01016859 0.80532136 0.95602702\n",
+ " 0.86843674 1.01042381 0.85683756 0.7077735 0.8944971 0.88039301\n",
+ " 1.13042098 1.20978352 0.8577241 0.87518391 0.95276923 1.13160609\n",
+ " 1.22488992 0.94769587 0.8426951 0.58069002 0.6692809 0.91142602\n",
+ " 0.57506243 1.01284375 1.01354465 0.98413037 0.87525764 0.79996122\n",
+ " 0.86086868 0.77697342 0.81600731 0.8784266 1.09702522 1.0939861\n",
+ " 0.90135922 1.04362861 1.01470519 0.792187 1.05513573 0.99680184\n",
+ " 0.97572777 0.98412073 1.0619824 1.0562335 0.90146452 1.02031978\n",
+ " 0.86964554 1.02522917 0.72419191 1.01134527 0.94735918 0.99067358\n",
+ " 1.00375463 0.85546666 1.21158773 0.82921225 1.13043585 1.22443928\n",
+ " 0.70939298 0.88719338 0.93792206 0.97469568 0.983875 0.91482246\n",
+ " 0.96100239 0.84527578 0.91937779 0.9840862 0.82557912 1.12079413\n",
+ " 1.14925562 0.84066792 0.57027969 0.84071859 1.00337804 0.99784433\n",
+ " 1.02290304 1.10867573 0.96311733 1.11357673 1.1462676 1.03603198\n",
+ " 1.13138332 1.05201547 1.1190262 1.01230234 1.04833681 1.05807128\n",
+ " 0.93470885 1.09918985 0.98297562 1.10190927 1.03616786 0.9409892\n",
+ " 0.91581863 0.97093109 0.88648859 0.94392895 0.92131744 0.99960814\n",
+ " 1.11194778 1.00402307 1.09605958 1.01892813 1.1857783 0.98046061\n",
+ " 1.05058251 1.05276888 1.05823415 0.96589646 1.13139313 1.13420731\n",
+ " 1.1021916 0.87553887 0.85132516 0.87677475 1.15839027 0.98635765\n",
+ " 1.10405201 1.07118413 0.98175021 0.98153933 0.95209845 0.97097428\n",
+ " 0.91469509 0.93631047 1.03589345 0.9223157 1.10301058 0.84372179\n",
+ " 0.92405887 1.01700208 0.85560625 0.66376932 0.62879869 1.17535327\n",
+ " 1.17288113 1.12936734 0.85630628 0.9935012 0.79078083 0.83314447\n",
+ " 0.69793571 1.1177419 0.87474965 0.95546112 0.7140535 0.78362202\n",
+ " 0.93636762 0.88706518 0.98854831 0.53318995 0.9112592 0.9089143\n",
+ " 0.75849555 0.68877587 0.88360259 1.0160993 1.01399431 0.82711715\n",
+ " 0.85090735 0.91536395 0.77050502 0.81631043 1.07070628 0.98872353\n",
+ " 1.01001572 0.97150056 0.84340552 0.99123647 0.91783032 0.92315487\n",
+ " 1.04613481 0.86962825 0.78006934 0.83122908 1.00513335 1.15972018\n",
+ " 0.8429556 0.95364582 0.61769342 1.05107173 0.57762401 0.78019883\n",
+ " 0.8785505 1.05474182 0.81710598 1.06734876 1.03190654 1.050957\n",
+ " 1.02658854 0.84666405 1.0093965 0.94093656 1.00778426 1.1074735\n",
+ " 0.98381487 1.0724729 0.99977569 0.97708577 0.923541 1.13391103\n",
+ " 1.02111798 0.93046956 0.97238313 0.95116562 0.91564976 1.00064709\n",
+ " 1.02093509 0.94068727 0.86237653 1.00018309 0.77847054 1.09295294\n",
+ " 1.01678288 0.80845244 0.71169169 0.88203131 0.9863562 0.82288704\n",
+ " 0.84434346 0.95841932 1.04595906 0.91983848 0.9188853 0.94738375\n",
+ " 0.59065689 0.84913679 0.80816558 0.62399018 0.57594035 1.11249105\n",
+ " 0.79668246 0.98954696 0.73444014 0.92594717 0.98615711 0.65543418\n",
+ " 0.73419254 1.07349907 1.09006532 0.94969844 0.90211171 0.67909127\n",
+ " 0.972697 0.96793238 0.75460412 0.95481894 0.94084261 0.72800249\n",
+ " 0.81684544 1.20408444 0.88243765 0.91681268 0.91178225 1.02867303\n",
+ " 0.72027834 0.73994017 0.86507523 0.84182556 0.72280821 1.00716482\n",
+ " 0.7813304 0.77786768 0.92744098 0.85386407 0.93780078 0.93450547\n",
+ " 1.09329797 0.97965232 0.93129151 1.04902224 1.14356557 0.74093314\n",
+ " 1.12139284 0.73946144 0.90542022 0.72771143 1.04499595 0.99297089\n",
+ " 0.8513203 0.9776011 0.95937969 1.01517576 0.86413594 0.96717272\n",
+ " 0.83557305 1.07126071 1.03054722 1.0613062 1.02719786 0.96788921\n",
+ " 0.86524413 1.18237412 0.87348863 0.91545788 0.85860723 1.03962687\n",
+ " 1.11498402 1.08390092 1.10431583 1.01675099 0.79878511 1.13491973\n",
+ " 0.73188063 0.8455133 0.51351895 0.87142331 0.81459734 0.98342172\n",
+ " 0.97600834 0.48404321 1.06702925 0.92098292 0.73439884 0.81202441\n",
+ " 0.86378087 0.8029987 0.83480071 0.93941543 0.70800905 0.77485544\n",
+ " 0.87576174 0.73419887 0.93942517 0.85130773 0.91783885 1.02111565\n",
+ " 0.93768646 1.0693141 1.16333718 0.95295644 1.0527556 0.83745164\n",
+ " 0.98366661 0.94934376 0.89209662 0.95559417 0.85204521 0.60885469\n",
+ " 1.070234 1.06721 1.03532569 1.05480828 0.99881521 0.8012916\n",
+ " 1.14323032 0.91441495 1.10883449 1.04013935 1.20607014 1.11743984\n",
+ " 0.90860225 1.07648507 1.11825964 0.92849238 0.65810364 0.885797\n",
+ " 0.70967654 0.91669225 0.98230321 1.12278505 0.92942713 0.87650561\n",
+ " 0.97783624 0.91096721 0.98055912 0.76593545 0.71370887 0.6819643\n",
+ " 1.01936116 1.16272168 1.03158326 0.99730534 0.86422834 0.99842231\n",
+ " 0.71714957 0.76988248 0.80245735 1.15759927 0.84638889 0.98602918\n",
+ " 1.03589439 1.00618663 0.88795536 1.01789816 1.03019386 1.06129106\n",
+ " 1.18554105 1.06752199 0.94145773 1.03386674 1.07043105 1.08696959\n",
+ " 0.83686705 1.03079766 0.98600526 1.00050079 1.12573855 0.84908867\n",
+ " 1.15602068 0.96037746 0.81744958 1.02913338 0.75246943 0.97680999\n",
+ " 1.06694479 0.8708394 0.9573243 1.03692169 0.82906066 0.91166574\n",
+ " 0.90316753 0.8377321 0.74793776 1.04323352 0.73062325 1.07208996\n",
+ " 0.97261617 0.9763269 1.00007634 0.76443776 0.90001959 0.98829049\n",
+ " 1.05536892 0.85672581 0.88134835 0.89184656 0.76474907 0.52445101\n",
+ " 1.08993664 0.89805219 1.07821231 0.74792553 0.97528409 0.95903345\n",
+ " 0.78511278 0.70716597 0.95115371 0.84988213 0.93965363 0.80432173\n",
+ " 1.02343944 1.01158008 1.00014353 1.10379588 0.80695574 0.99468709\n",
+ " 0.9853186 0.90415822 0.79189411 0.9472948 1.01916826 0.92256528\n",
+ " 0.83215536 0.85473224 0.96014077 0.96279196 0.97937331 1.09123444\n",
+ " 1.00141688 0.8586851 0.94895783 0.91671689 0.86506038 0.91685519\n",
+ " 0.98937466 1.00141994 0.88197692 0.91925865 0.82800563 0.99232152\n",
+ " 0.85500613 1.06178381 1.15467249 1.033186 1.10492143 1.2063439\n",
+ " 1.01802737 1.24300977 1.25272725 1.14620797 0.88937241 0.68586874\n",
+ " 0.64438791 0.99760918 1.18024627 1.0070797 1.07223295 0.9562927\n",
+ " 1.07629201 1.02175338 0.92197925 0.88499438 1.0098337 1.0526718\n",
+ " 0.96735408 0.53145849 0.80276512 1.04651752 0.97209258 0.76439374\n",
+ " 0.81577401 0.86905004 0.86286669 0.76308561 0.83980759 0.71727065\n",
+ " 1.04954725 0.87153048 0.7838484 0.9176393 0.76039787 1.1167635\n",
+ " 0.74122103 1.12492674 0.87911194 1.04390272 1.09548012 1.08954162\n",
+ " 1.1005875 0.96698949 0.93585513 1.10773671 0.84880341 0.88412785\n",
+ " 0.91236299 0.94670779 0.54057727 0.77007672 0.97737834 0.9210275\n",
+ " 0.85361226 0.89533784 0.74488914 1.2209995 0.73041405 0.70377233\n",
+ " 0.91471134 0.75912198 0.93797381 0.89656991 0.57592883 0.91659749\n",
+ " 0.81583877 0.96728831 0.95159157 0.82056786 0.84052262 0.86088422\n",
+ " 1.13699011 1.05006497 1.0835101 0.99084507 0.94382648 1.15837877\n",
+ " 0.70257778 0.99223628 1.07253359 0.95474302 0.88414868 1.13538438\n",
+ " 0.92086584 0.52098149 1.10947306 0.86383603 0.98453589 0.91980584\n",
+ " 0.9794295 0.95321669 0.91434376 0.89442409 0.80853004 0.79286018\n",
+ " 0.9212393 0.94564519 0.76343841 0.91216478 0.91379791 0.91572645\n",
+ " 0.91650522 1.01703703 0.96955736 1.07443054 0.89513236 0.67374208\n",
+ " 0.68661692 0.68778005 0.68828047 0.4513123 0.89804119 0.96278955\n",
+ " 1.03298267 1.04320317 1.08934673 1.11959835 1.0920206 0.69222218\n",
+ " 0.58834938 0.6514184 0.89783974 0.97300552 0.96230626 0.98893337\n",
+ " 0.96572565 0.90166317 0.8900099 1.07975706 0.8670314 0.89416673\n",
+ " 0.82507204 0.92799422 0.90850361 0.90906859 0.8632283 1.03008551\n",
+ " 0.85548517 0.72650441 0.83310785 0.56523309 0.73956873 0.65958911\n",
+ " 0.60872293 0.59206699 0.55207605 0.69466705 0.64329664 0.79891409\n",
+ " 0.88649738 0.40988109 0.69232309 0.6928725 0.58083218 0.45271609\n",
+ " 0.69991926 0.80975737 1.18652812 1.00406657 0.95688882 0.88437406\n",
+ " 0.96081691 0.89072634 0.7472389 0.85757252 0.86936642 0.84703002\n",
+ " 0.73760414 1.1563862 0.82755653 0.89650233 0.9514573 0.73569208\n",
+ " 0.65579139 0.77822016 0.66345922 0.95236423 1.17516738 0.90904011\n",
+ " 0.80435753 1.14731959 0.84174958 0.93760161 1.05460773 0.9373178\n",
+ " 1.09264623 0.79324683 1.05347834 0.94208993 1.06613501 0.96750374\n",
+ " 0.71402425 0.59995847 0.97689021 1.08359431 0.9742434 1.00554383\n",
+ " 1.07059112 1.01876387 0.97681089 1.02678879 1.06284573 1.00626847\n",
+ " 0.66106635 0.86408993]\n",
+ "Distances (cosine): [1.01421002 1.07077005 0.96594289 0.95000596 0.93541515 0.91584817\n",
+ " 1.10235789 0.98789354 0.95672152 1.13044422 1.17682692 1.1313685\n",
+ " 1.11347398 1.07404553 0.91428485 0.99998853 0.85286168 1.06301463\n",
+ " 0.9042512 0.74559444 0.88334968 1.10095713 0.94990308 0.72190936\n",
+ " 1.04197876 1.26943121 1.25313551 1.16132919 1.149733 1.09759273\n",
+ " 1.05385586 0.97760032 1.19306924 0.89137526 0.82278112 1.10918612\n",
+ " 1.1552922 0.93129301 0.8070838 1.05320638 0.88975955 0.74838447\n",
+ " 0.90379122 1.10079649 1.03759931 1.06441512 0.91374018 0.89799835\n",
+ " 1.12001878 1.12568702 1.02814114 1.00965223 1.03077759 1.12487169\n",
+ " 0.89924438 1.0375218 0.82881765 0.74997263 1.02629928 0.9143763\n",
+ " 1.05566307 1.22149654 0.9382606 0.81491388 0.96719048 0.93176536\n",
+ " 1.0310118 1.15458284 0.94428458 0.89323419 0.81143344 1.05221236\n",
+ " 0.69612361 1.02231484 1.06812633 1.04262615 0.95776369 0.97006009\n",
+ " 0.74432267 0.78653623 0.79066271 0.9086223 1.03392748 1.16493876\n",
+ " 0.85881669 1.05501108 1.00985656 0.92049043 1.03236735 1.00606309\n",
+ " 0.95556721 0.93928631 0.87454242 0.87921475 1.02666891 0.95239818\n",
+ " 1.03699923 1.07272994 1.03851969 1.05823531 1.06117864 1.09019676\n",
+ " 1.08201718 1.03136059 1.17390947 0.65324844 1.04656484 0.94680156\n",
+ " 0.78535461 0.92627034 0.83182643 0.91454836 1.01546203 1.08613179\n",
+ " 0.85707615 0.89915109 0.99945264 0.99790772 0.75359248 1.11661919\n",
+ " 1.13690042 0.94200297 0.66797856 0.7792418 0.90760114 1.1652812\n",
+ " 1.01076802 1.00083519 1.02135328 1.07008462 1.0465478 1.03141616\n",
+ " 0.93089314 1.02566364 0.91768844 1.00117691 1.06147185 0.96244662\n",
+ " 0.94202731 1.14518485 0.88701054 1.02602909 1.12298608 0.89767381\n",
+ " 0.80849978 0.75235089 0.87350399 0.82984094 0.98612795 1.03116154\n",
+ " 1.02569068 1.19009324 0.94705876 0.89758241 1.06794324 1.1173812\n",
+ " 1.00002239 0.95118379 1.0819254 1.03390323 1.18389165 1.22619294\n",
+ " 1.07125352 1.03673406 1.13721381 1.14358808 1.03921018 1.10929593\n",
+ " 1.15356524 0.94299601 0.93255633 1.0354042 0.99992875 1.22094772\n",
+ " 1.04773202 0.99620774 0.95986149 1.00901921 1.05818082 0.85289335\n",
+ " 0.9805151 0.87018584 0.89484673 0.72701963 0.60590436 1.25597944\n",
+ " 1.02992865 1.19245666 0.85618525 0.92386848 0.99500744 0.89953438\n",
+ " 0.92721279 1.13542935 0.99860195 0.86375199 0.98436443 0.8571092\n",
+ " 1.02477788 0.86434522 0.98546864 0.54109603 0.91179237 1.17826994\n",
+ " 1.01493297 0.80387181 0.96187848 1.12834705 0.73258123 0.95054157\n",
+ " 0.95886944 0.82537672 0.95843735 0.90205049 1.1389638 1.02918902\n",
+ " 1.13644089 1.00706689 0.85336445 0.98860358 0.92251491 1.00177095\n",
+ " 1.11650379 1.01275352 0.89363632 0.92761628 1.04450738 1.17055077\n",
+ " 0.52432958 0.78422291 0.63371012 0.88070685 0.77349489 0.76910225\n",
+ " 0.80331347 1.0364185 1.02270963 1.038215 1.02398675 1.08763204\n",
+ " 1.07846679 0.79504672 1.00124499 0.93957569 0.88239396 0.93534626\n",
+ " 0.92568717 0.94234442 1.04472507 1.10432267 0.86822146 1.13413591\n",
+ " 1.18371492 0.84757253 0.91152468 0.8497535 0.95699655 0.83152117\n",
+ " 0.99178947 0.98940031 0.90993428 0.96007603 0.93210841 0.90804934\n",
+ " 0.82659429 1.06024528 0.97579451 1.07108973 1.08036627 0.79050503\n",
+ " 0.82850289 0.90130659 1.17613077 0.96398701 1.00363551 1.1321008\n",
+ " 0.97132952 1.03844101 1.03351224 0.60551334 0.78085424 1.01640911\n",
+ " 1.01448868 1.01384401 1.06170593 1.15045849 1.19076579 0.90208136\n",
+ " 1.02779955 1.09864822 1.10792748 0.96750283 0.95959741 0.93257905\n",
+ " 1.03840682 0.93295935 0.86031756 0.95326759 0.97571473 0.81369039\n",
+ " 0.91639636 1.14217286 1.04321351 1.0297567 1.01640227 1.00399503\n",
+ " 0.88814263 1.00590476 0.98255543 0.99304061 0.81652194 1.03879372\n",
+ " 0.90157022 0.92704159 1.01558832 0.94303023 1.0617102 0.83778088\n",
+ " 0.92947139 0.85321749 1.04188043 1.10544022 0.94910453 0.93977329\n",
+ " 1.03304495 0.73600388 0.91251683 0.70609508 1.08298935 0.98619713\n",
+ " 0.95332587 0.91133852 1.02360999 0.89181471 0.94127942 0.92755766\n",
+ " 0.84427062 0.80308185 0.99208308 0.96888637 0.90540487 0.9704813\n",
+ " 0.9556628 1.08476948 0.98662139 1.08956675 0.97182913 1.13644314\n",
+ " 1.27332452 1.20308043 1.2421625 1.05334428 0.81667924 0.97116698\n",
+ " 0.63704335 0.73666235 0.77787665 0.90978332 0.89015694 1.12392825\n",
+ " 1.14062722 0.75033401 1.09300461 0.9408243 1.00381744 0.74848446\n",
+ " 0.96713376 0.86305515 1.13419239 0.96432476 0.77037546 0.84067363\n",
+ " 1.01825012 0.94936511 0.81483043 0.96499299 0.99398247 1.03000178\n",
+ " 0.91531934 0.96068388 1.06621781 1.09488141 0.86857553 1.07615135\n",
+ " 1.14380331 1.05312301 0.93004361 0.67851594 0.78098376 0.70293232\n",
+ " 0.99674313 1.08279052 0.896288 1.18405943 0.90918565 0.92743257\n",
+ " 1.20862071 0.96499365 1.01244794 0.99499359 1.04211016 1.09437652\n",
+ " 0.81155586 1.0487013 1.06532539 0.9075846 0.63002966 1.10142332\n",
+ " 0.89745108 1.05955488 0.97285254 1.17514401 0.79508738 1.02933105\n",
+ " 1.02489202 0.97172641 0.85938453 0.85515589 0.83397013 0.79571494\n",
+ " 0.86277216 1.19613552 1.09584571 1.24039174 1.04764324 1.18246217\n",
+ " 0.89224625 0.94691471 0.87181621 1.24564567 0.99619212 0.98045941\n",
+ " 1.06649087 0.96221909 1.08993609 1.02991318 0.93683735 0.97603669\n",
+ " 1.02013848 0.95627047 0.83133649 0.96387518 0.93777374 1.05786982\n",
+ " 0.76992705 0.86717048 0.98895409 0.78083838 1.03101534 0.87823277\n",
+ " 1.15278151 1.08899768 1.03725794 1.04965223 0.67227334 0.87647351\n",
+ " 1.02318319 0.92246892 1.16992703 1.24351822 0.85151466 0.99091382\n",
+ " 0.8558565 1.01948401 0.84946348 0.9439263 0.83183478 1.18141189\n",
+ " 1.16745156 1.08540768 1.15561904 0.89934233 0.97343327 0.98482588\n",
+ " 1.19028892 0.92623536 0.99067947 0.89845048 1.01340538 0.74883602\n",
+ " 1.01493956 0.97937797 1.05470955 0.91764394 0.93633508 1.06238751\n",
+ " 0.94549371 0.83888775 0.9070468 1.05510956 0.90462685 1.0087438\n",
+ " 0.97107829 1.01522246 0.9302662 1.15726902 0.80284027 1.19252622\n",
+ " 1.1821632 0.99839991 0.9149796 1.11836941 1.02756323 1.0182542\n",
+ " 0.87873097 0.91207395 1.12175608 0.84843169 0.96426943 1.02251518\n",
+ " 0.99960288 0.97741411 0.9949037 0.87435148 0.89602769 1.02221796\n",
+ " 1.08462531 1.05837634 0.99632494 1.09367002 0.9095804 0.9188991\n",
+ " 0.85381149 1.06659725 1.159693 1.09341479 1.1403699 0.89690222\n",
+ " 1.04092955 1.01145248 0.8841497 0.96721476 0.91119218 0.66088694\n",
+ " 0.61161225 1.07549558 1.1109724 1.11380774 1.10473141 1.03202546\n",
+ " 0.93158237 0.99992473 0.95661108 0.85383709 1.04085812 0.87950328\n",
+ " 0.95318781 0.7419186 0.74702882 0.92180698 1.04993126 0.89625706\n",
+ " 0.93371683 0.99358931 1.06869572 0.91614501 0.86268864 0.96742496\n",
+ " 0.90234201 1.08079229 1.01990914 0.9999955 0.99433874 1.16764531\n",
+ " 0.84518952 1.13310209 0.84835348 1.08407546 1.09726991 0.76191091\n",
+ " 0.8213361 0.86287135 0.95555861 0.90950468 0.87899726 0.96723498\n",
+ " 0.91575199 0.98808816 0.776629 0.95730194 0.96463284 0.91534593\n",
+ " 1.07197091 1.01621825 0.87348515 1.25385266 0.69540447 0.76464481\n",
+ " 0.82080954 0.65302985 1.08651262 0.77160154 0.68150335 0.83261548\n",
+ " 0.92683973 1.08051067 1.01852538 0.98614617 0.94455482 1.00702254\n",
+ " 1.00008084 0.93580731 1.0616711 0.83148347 0.89916151 0.90136532\n",
+ " 0.86074004 1.04695182 1.11714621 1.00615707 0.96007462 1.13313071\n",
+ " 1.08832412 0.68294223 1.16511034 0.95802863 0.93620386 0.971089\n",
+ " 0.92962849 0.91312085 0.94873158 0.89717467 0.90195928 0.90336084\n",
+ " 0.97513478 0.94689992 0.95390285 0.92717124 1.05760276 1.05877296\n",
+ " 1.02209879 0.94824176 0.95708108 1.1078558 0.97412374 0.71985515\n",
+ " 0.92989838 0.93398248 0.83825691 0.5599142 0.84831357 1.10500717\n",
+ " 1.10366733 1.14147034 0.99173896 1.04376495 1.26199013 0.66706478\n",
+ " 0.85287327 0.9593587 1.11194832 1.05604997 1.08785454 1.01055063\n",
+ " 0.97724822 0.8655906 0.89004955 1.10082832 0.95885093 0.96468762\n",
+ " 0.84384673 1.03736174 0.95476951 1.0261436 0.78837881 0.97450544\n",
+ " 0.83222128 0.62927318 0.82400375 0.52147976 0.69180839 0.82185185\n",
+ " 0.11017399 0.76714388 0.64540427 0.63702853 0.52904967 0.79385648\n",
+ " 0.79439012 0.71529433 0.72900929 0.68235315 0.67368606 0.35477998\n",
+ " 0.63049083 0.62150452 0.92127596 1.02990859 1.0329471 0.80957491\n",
+ " 1.03038357 0.87454645 0.70912277 0.84142239 1.05899013 0.88262278\n",
+ " 0.76246407 0.9873167 0.77268437 0.8730037 1.0975705 0.76374562\n",
+ " 0.90106238 0.9478527 0.82381044 0.97538538 1.16672601 1.00387179\n",
+ " 0.74260872 0.91008338 0.69360421 0.99719019 1.03310168 1.10513327\n",
+ " 1.10160857 0.77715861 1.01529971 1.09366493 0.92896896 1.04631665\n",
+ " 1.01239397 0.6195239 0.93534496 1.11531855 0.93347552 0.75444392\n",
+ " 0.86356271 1.14515802 1.02080894 1.02311573 1.17333291 1.23211226\n",
+ " 0.80074894 1.00174833]\n",
+ "Distances (cosine): [0.56706097 0.82212825 1.11119101 0.8025749 0.97192001 0.85489242\n",
+ " 1.14861313 0.90331888 0.91087433 0.89739816 1.01851144 1.03960711\n",
+ " 0.9941434 1.14155222 0.90141778 1.10001179 0.82344234 0.9888407\n",
+ " 0.74681057 0.92826909 0.83206778 0.94459442 0.75282543 0.84809843\n",
+ " 0.96959215 0.8840922 0.90431491 1.15507461 1.06514991 1.04427557\n",
+ " 1.01973571 0.84682324 0.97569836 0.89366771 0.80268255 0.94478583\n",
+ " 0.94100584 1.06180729 0.58561159 0.90193016 0.41820455 0.9551578\n",
+ " 1.00491652 0.83030135 1.0377206 1.1575918 0.96893882 1.16167905\n",
+ " 1.20828369 1.16257162 0.91962359 1.10231016 0.92757612 0.89512278\n",
+ " 0.96955355 0.93041152 0.90162324 0.9527817 0.76848326 0.48966466\n",
+ " 0.86061035 1.13331422 0.97481349 0.886571 0.78527874 0.95303368\n",
+ " 0.95666417 1.02600483 0.90057216 0.91002956 1.01379747 0.97511193\n",
+ " 0.84336916 0.68988944 0.84547021 0.90344587 0.83087055 0.33999791\n",
+ " 0.9007677 0.5390222 1.00278156 1.03536297 1.01225035 1.00469972\n",
+ " 0.86186795 1.05146677 1.01622699 0.98893389 0.83968909 1.06868243\n",
+ " 0.94167384 0.88611483 0.93030028 1.1687421 0.96209071 1.1016885\n",
+ " 0.96585165 1.05447136 0.80536147 1.15755929 0.96007734 0.91380652\n",
+ " 1.02122439 1.12152322 1.21230787 0.49348303 1.04499198 1.07358548\n",
+ " 0.93052967 0.9709419 1.09409566 1.05450829 0.98913823 0.89765347\n",
+ " 0.91508746 0.96242973 0.88718822 1.02872054 0.89191007 1.00960488\n",
+ " 1.04973753 0.99105496 0.90390632 0.66118543 1.02625044 1.08645022\n",
+ " 0.96288495 0.8320383 0.97060037 0.9844958 0.82788785 0.86946092\n",
+ " 0.80185347 0.91297831 1.11152386 0.90043332 1.08231523 0.89187363\n",
+ " 0.9707835 0.9810151 0.76178317 0.75715313 0.85458671 0.88828671\n",
+ " 0.86503092 0.79816531 0.85645629 0.79087102 1.01025291 1.18566753\n",
+ " 1.06510965 1.00854764 1.05474871 1.00366898 1.17488463 1.07922893\n",
+ " 1.10061414 1.02594392 1.0516511 1.06164214 1.10844121 1.06208269\n",
+ " 1.06616495 1.03820977 1.02655921 1.0706701 1.11518671 1.0761752\n",
+ " 1.18831868 1.10447912 1.03666651 1.12356661 1.09953036 1.04328803\n",
+ " 0.84861062 0.87405433 0.98473717 0.98411212 1.00977045 1.04698502\n",
+ " 1.01471253 0.83585106 0.90115049 0.92326746 0.65899413 1.14802123\n",
+ " 0.96341565 1.05815199 1.04236559 1.05669731 1.05420872 0.70151181\n",
+ " 0.91412378 0.895196 0.78315192 0.96712862 0.96955964 0.89893096\n",
+ " 0.8154264 0.82966153 0.83722747 0.72121683 0.94974512 1.16031757\n",
+ " 0.93370404 0.94752727 1.15072718 0.93443897 0.86640799 0.76140401\n",
+ " 0.94660395 0.98821094 0.97740921 0.95572064 1.14657135 0.80491785\n",
+ " 1.00780141 1.01159236 1.01818388 1.14279455 1.0504149 1.09970829\n",
+ " 0.96683253 1.14145487 1.01573219 0.93517987 1.04698298 1.06574635\n",
+ " 0.9174211 1.09476264 0.84642148 1.03043142 0.56688191 1.02755749\n",
+ " 0.80932971 1.01287179 0.86387673 0.80086569 0.76034106 1.00188086\n",
+ " 0.98330831 0.72642317 1.17505958 0.9231977 0.74207911 1.04430208\n",
+ " 0.96871996 0.91453616 1.02531398 1.1327503 0.92713711 1.08055921\n",
+ " 1.14337151 0.63577468 0.61560675 0.63174408 0.77421207 1.03440505\n",
+ " 0.96178377 0.98684455 1.01069043 0.88273346 0.92437907 0.91068833\n",
+ " 0.93993799 0.84901402 0.94871563 0.9839807 0.75190997 0.96446006\n",
+ " 0.39580773 0.70610975 1.07324116 1.03972023 0.62819504 1.21650727\n",
+ " 0.89747302 0.86003207 0.8452655 0.90554354 0.92201384 0.81790851\n",
+ " 1.10311079 1.14054661 1.04387008 1.14498186 1.02085092 0.79485105\n",
+ " 1.0210695 0.96678221 1.01999672 1.17582928 0.87191735 0.95536443\n",
+ " 0.98112976 0.95960532 0.91675486 0.48340027 0.58547944 0.95285562\n",
+ " 1.03262314 1.09591663 1.08618892 1.20274529 1.03236248 0.77176066\n",
+ " 0.79883669 1.01430101 0.88848731 0.71389627 0.89862181 0.99384423\n",
+ " 0.60729525 0.83641827 0.95742253 0.87465912 0.98144236 1.12039696\n",
+ " 1.08206896 0.82486538 0.92821735 0.95675842 1.00537456 0.99637331\n",
+ " 1.0219535 0.65359423 0.98317444 0.86164263 1.030309 0.96349382\n",
+ " 1.07304414 1.11618554 0.86879094 0.94100357 1.00141739 0.83456913\n",
+ " 1.0195228 0.95521872 1.05268532 0.96194298 1.02954507 0.77458228\n",
+ " 0.7327787 1.1106052 0.93219708 1.00545355 0.92097132 1.09333807\n",
+ " 1.10578046 1.12762976 1.05906461 1.14345285 0.60128273 0.83649392\n",
+ " 0.53592592 0.85471485 0.85727239 1.00622163 0.81352581 0.85540667\n",
+ " 0.90328878 0.78170918 0.97396397 0.55872487 1.01302282 0.82764597\n",
+ " 1.00544244 1.04737124 1.04419222 1.02681514 0.77268162 0.87638183\n",
+ " 1.01729634 1.01038678 0.7857973 0.76457304 0.78632252 1.00129976\n",
+ " 0.93883056 0.79994259 1.16675829 0.8194328 0.92009691 0.99059313\n",
+ " 1.04436691 0.80291934 1.06908349 0.95093185 0.84360165 0.83927486\n",
+ " 0.85698585 0.66401842 0.91969913 1.0431957 1.08736191 0.79652925\n",
+ " 1.02299838 0.78318327 0.91799597 0.82792854 1.06592225 0.9172317\n",
+ " 1.00384614 0.9977803 1.03434373 0.93508444 0.87081305 1.13208582\n",
+ " 1.05529849 0.98475901 0.81846145 1.09985665 1.02662107 0.94085752\n",
+ " 1.09386589 0.95795127 0.84781994 0.7333165 0.93242418 0.85749164\n",
+ " 0.88419709 1.09398442 1.19350441 1.06942284 0.86713553 1.10722028\n",
+ " 1.04351246 0.95089017 1.03997491 1.16117756 1.15281189 0.9323813\n",
+ " 0.98735978 0.91952159 0.87943963 0.93954764 0.91345122 0.89631903\n",
+ " 0.99915507 0.86971601 0.82593144 0.7982662 0.85422708 0.9667324\n",
+ " 0.92981673 0.96037634 1.08780012 0.77866837 0.89718166 0.95977864\n",
+ " 0.95665853 1.00411929 0.92896315 0.90276323 0.78958691 0.79988992\n",
+ " 0.91085182 0.96278792 1.11608588 1.08583426 1.02601674 0.99967455\n",
+ " 0.86742896 0.86785954 0.95867451 1.03448778 0.89492161 1.11519812\n",
+ " 0.99044153 0.8032279 0.99573059 0.99736843 1.11718126 1.10893809\n",
+ " 0.91764254 1.07144824 0.91677664 1.08635095 0.96073935 0.84565667\n",
+ " 1.12474471 1.11257326 1.14854123 1.02461483 0.97037398 1.02438511\n",
+ " 0.83325551 0.99049576 0.98067024 0.84589078 1.00408083 0.77514052\n",
+ " 0.70053144 0.77964524 0.82221173 0.85468024 0.74594541 1.13188377\n",
+ " 1.10441471 1.11355709 0.89050676 1.12608295 0.99680972 1.17192754\n",
+ " 1.05165017 0.8023706 0.96989638 0.67172585 0.83678269 0.81195707\n",
+ " 0.9553532 1.10683178 1.06272586 0.8580761 0.68881852 1.05588548\n",
+ " 1.17484603 0.96147414 0.9788587 1.05814354 1.04072202 1.02422347\n",
+ " 0.87836934 1.01063262 1.12845065 0.98254377 0.99486538 1.02849547\n",
+ " 1.00470511 1.18470417 1.10453359 1.08019333 0.95049096 0.90613873\n",
+ " 0.70222271 1.09381703 0.87240909 1.03925432 1.13585133 0.73246705\n",
+ " 0.39475797 1.07720281 0.99771844 1.01989555 1.03730726 1.01158369\n",
+ " 0.51305422 0.90480117 0.85423219 0.89843301 0.86556896 1.01406515\n",
+ " 1.05364801 1.02602234 0.95240929 0.98652336 0.83837425 1.03846629\n",
+ " 0.49914265 0.95634356 0.9266835 1.03006505 0.89777976 1.21923873\n",
+ " 0.90595424 0.95413491 1.0683444 1.07279044 1.11652209 0.91859676\n",
+ " 0.96188245 0.91873966 0.92196043 0.8872636 0.40538171 0.95401647\n",
+ " 0.92813198 1.05562435 0.81189793 1.01039303 0.91179031 0.33619683\n",
+ " 0.67421358 1.01196161 0.96739479 1.33933325 0.89942979 0.91415999\n",
+ " 0.94875529 0.89153728 0.89608998 0.96241334 0.89650858 0.930984\n",
+ " 0.77174474 1.04979684 0.94253265 1.04147305 0.92809325 0.98269462\n",
+ " 1.04353906 0.9642066 1.0944907 0.93020661 1.0156857 1.07081341\n",
+ " 0.68065332 0.82251706 0.98487342 0.72087942 0.68806895 0.93118888\n",
+ " 1.10284038 0.8314337 1.22696603 1.15890645 0.82364241 0.94209504\n",
+ " 0.93601577 0.88376627 1.09834846 0.84905004 1.04538741 0.65747376\n",
+ " 0.94476483 0.57224291 0.84547182 1.11025435 1.12674861 1.07664702\n",
+ " 1.05432301 1.13812773 1.17453089 1.16646877 1.06711601 0.99928977\n",
+ " 0.75375513 0.87712715 0.92613822 0.87232553 1.08271649 1.19657827\n",
+ " 1.18730882 1.12355287 0.92409291 0.86854405 1.04320894 0.95783165\n",
+ " 1.08911796 0.98543132 1.16160252 0.924158 0.92937278 1.04553755\n",
+ " 1.09180412 0.95329214 1.00248517 1.14575957 1.04231661 1.07935577\n",
+ " 0.87976354 1.0738666 1.03929065 1.15766744 0.53016505 0.98245035\n",
+ " 1.10816885 0.89769466 0.46896295 0.61994693 0.25177224 0.94149321\n",
+ " 0.79691966 0.59284655 0.72498255 0.94286425 0.73706125 0.40466257\n",
+ " 0.48916059 0.71715084 0.78906761 0.76742211 0.75555709 0.68339005\n",
+ " 0.38268267 0.22474446 1.122836 1.04328393 1.18118839 0.79179384\n",
+ " 0.98134567 0.72829315 0.84669564 0.78723765 0.94099877 0.77215897\n",
+ " 0.86226458 0.99791484 0.99057288 1.02914502 1.03741296 0.97359336\n",
+ " 1.07590738 1.08905354 1.03744072 0.95284078 1.06829615 1.10371385\n",
+ " 0.98201801 1.07481656 0.84123079 1.0025847 1.10424004 1.11079413\n",
+ " 0.76107916 0.68111323 0.70079031 0.927157 0.80227648 0.81733981\n",
+ " 0.80325203 0.8630462 1.00162612 1.05606058 1.00803951 0.920244\n",
+ " 0.51312745 1.01635234 0.95242251 1.0521877 1.06687457 1.1582277\n",
+ " 0.93555897 1.07386927]\n",
+ "Columns: Index(['Country', 'Histologic_Grade_FIGO', 'Histologic_type',\n",
+ " 'Path_Stage_Primary_Tumor-pT', 'Path_Stage_Reg_Lymph_Nodes-pN',\n",
+ " 'Clin_Stage_Dist_Mets-cM', 'Path_Stage_Dist_Mets-pM',\n",
+ " 'tumor_Stage-Pathological', 'FIGO_stage', 'BMI', 'Age', 'Race',\n",
+ " 'Ethnicity', 'Gender', 'Tumor_Site', 'Tumor_Focality', 'Tumor_Size_cm'],\n",
+ " dtype='object')\n",
+ "Topk matches: [{'source_column': 'Country', 'top_k_columns': [ColumnScore(column_name='country_of_birth', score=0.5170051943814018)]}, {'source_column': 'Histologic_Grade_FIGO', 'top_k_columns': [ColumnScore(column_name='tumor_grade', score=0.6650353057554459)]}, {'source_column': 'Histologic_type', 'top_k_columns': [ColumnScore(column_name='history_of_tumor_type', score=0.6613978065162984)]}, {'source_column': 'Path_Stage_Primary_Tumor-pT', 'top_k_columns': [ColumnScore(column_name='uicc_pathologic_t', score=0.6776191475935113)]}, {'source_column': 'Path_Stage_Reg_Lymph_Nodes-pN', 'top_k_columns': [ColumnScore(column_name='ajcc_pathologic_n', score=0.6276434644871174)]}, {'source_column': 'Clin_Stage_Dist_Mets-cM', 'top_k_columns': [ColumnScore(column_name='uicc_pathologic_m', score=0.7378922945435357)]}, {'source_column': 'Path_Stage_Dist_Mets-pM', 'top_k_columns': [ColumnScore(column_name='uicc_pathologic_m', score=0.7113686845189913)]}, {'source_column': 'tumor_Stage-Pathological', 'top_k_columns': [ColumnScore(column_name='ajcc_pathologic_stage', score=0.7664999691570722)]}, {'source_column': 'FIGO_stage', 'top_k_columns': [ColumnScore(column_name='figo_stage', score=0.7460380829626372)]}, {'source_column': 'BMI', 'top_k_columns': [ColumnScore(column_name='bmi', score=0.761187469736605)]}, {'source_column': 'Age', 'top_k_columns': [ColumnScore(column_name='weight', score=0.700687201695624)]}, {'source_column': 'Race', 'top_k_columns': [ColumnScore(column_name='race', score=0.7150201421760489)]}, {'source_column': 'Ethnicity', 'top_k_columns': [ColumnScore(column_name='ethnicity', score=0.7564064760060497)]}, {'source_column': 'Gender', 'top_k_columns': [ColumnScore(column_name='gender', score=0.890303961909301)]}, {'source_column': 'Tumor_Site', 'top_k_columns': [ColumnScore(column_name='tumor_level_prostate', score=0.5901189146773462)]}, {'source_column': 'Tumor_Focality', 'top_k_columns': [ColumnScore(column_name='tumor_focality', score=0.8898260142520747)]}, {'source_column': 'Tumor_Size_cm', 'top_k_columns': [ColumnScore(column_name='tumor_width_measurement', score=0.775255538156757)]}]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Country | \n",
+ " country_of_birth | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Histologic_Grade_FIGO | \n",
+ " tumor_grade | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Histologic_type | \n",
+ " history_of_tumor_type | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Path_Stage_Primary_Tumor-pT | \n",
+ " uicc_pathologic_t | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Path_Stage_Reg_Lymph_Nodes-pN | \n",
+ " ajcc_pathologic_n | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Clin_Stage_Dist_Mets-cM | \n",
+ " uicc_pathologic_m | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Path_Stage_Dist_Mets-pM | \n",
+ " uicc_pathologic_m | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " tumor_Stage-Pathological | \n",
+ " ajcc_pathologic_stage | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " FIGO_stage | \n",
+ " figo_stage | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " BMI | \n",
+ " bmi | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Age | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Race | \n",
+ " race | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Ethnicity | \n",
+ " ethnicity | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Gender | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Tumor_Site | \n",
+ " tumor_level_prostate | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Tumor_Focality | \n",
+ " tumor_focality | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Tumor_Size_cm | \n",
+ " tumor_width_measurement | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target\n",
+ "0 Country country_of_birth\n",
+ "1 Histologic_Grade_FIGO tumor_grade\n",
+ "2 Histologic_type history_of_tumor_type\n",
+ "3 Path_Stage_Primary_Tumor-pT uicc_pathologic_t\n",
+ "4 Path_Stage_Reg_Lymph_Nodes-pN ajcc_pathologic_n\n",
+ "5 Clin_Stage_Dist_Mets-cM uicc_pathologic_m\n",
+ "6 Path_Stage_Dist_Mets-pM uicc_pathologic_m\n",
+ "7 tumor_Stage-Pathological ajcc_pathologic_stage\n",
+ "8 FIGO_stage figo_stage\n",
+ "9 BMI bmi\n",
+ "10 Age weight\n",
+ "11 Race race\n",
+ "12 Ethnicity ethnicity\n",
+ "13 Gender gender\n",
+ "14 Tumor_Site tumor_level_prostate\n",
+ "15 Tumor_Focality tumor_focality\n",
+ "16 Tumor_Size_cm tumor_width_measurement"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "column_mappings = bdi.match_schema(dataset, target=\"gdc\", method=\"ct_learning\")\n",
+ "column_mappings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "100%|██████████| 1/1 [00:00<00:00, 15.27it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Table features extracted from 1 columns\n",
+ "Table features loaded for 734 columns\n",
+ "Distances (cosine): [0.62180854 0.95828624 0.97060146 1.06355793 1.05457518 0.95031876\n",
+ " 0.87183141 0.98037726 1.08884842 0.59818588 0.75152265 0.83261097\n",
+ " 0.80213099 0.77332142 0.87859477 1.16856598 0.83399758 1.10548815\n",
+ " 0.95137044 1.08006319 0.84199771 1.13298833 1.02036063 1.12404512\n",
+ " 1.19349639 0.82890662 0.8390033 0.89459161 0.89116058 0.87583417\n",
+ " 0.85418167 0.88809974 0.93308411 1.1451897 0.82541944 1.21806511\n",
+ " 1.22505213 1.12013957 0.84643531 0.97380871 0.9138511 0.8798859\n",
+ " 0.96106713 1.1585152 0.95307048 0.80630147 0.83896481 0.96793791\n",
+ " 0.9969389 0.95977895 1.07777176 1.03366965 1.11084185 1.0039425\n",
+ " 1.02620412 0.9678424 0.97017597 0.99921509 0.2632313 0.87646072\n",
+ " 0.88057175 1.06924909 0.95787864 0.78496797 0.63093806 0.96951553\n",
+ " 1.06129685 1.04936767 0.86532366 0.98907561 0.86559347 0.85613342\n",
+ " 0.88012961 1.02773534 0.90308327 0.87087699 0.28419954 0.38251533\n",
+ " 0.80052387 0.87840672 0.93036599 1.11666263 0.9553454 0.94572933\n",
+ " 0.91917708 1.04233417 1.06448485 0.91170815 1.13248806 1.03661507\n",
+ " 1.05336171 0.92651594 0.97839488 1.13371658 0.99194777 1.08588376\n",
+ " 0.89700062 0.94187908 0.85409067 0.8289185 0.86272491 0.91847914\n",
+ " 1.14620391 1.15113244 1.23869078 0.83359777 1.23599229 1.04599379\n",
+ " 1.05217285 0.96232584 1.16335715 1.06882847 1.0267811 0.70104295\n",
+ " 1.19456603 0.9915321 1.18500914 1.01749104 0.89927694 0.9192603\n",
+ " 0.98327772 0.85248077 0.92227078 0.69725199 0.95593437 1.24535924\n",
+ " 1.14966094 1.05111995 1.06565873 1.13605671 1.03284434 1.06107447\n",
+ " 0.89355102 0.94802372 1.14743307 0.75017295 0.82545174 1.17318279\n",
+ " 0.93035523 0.72175539 0.6335789 0.8988125 0.72257156 0.95169487\n",
+ " 0.88223726 0.79858273 0.82114543 0.83841884 1.0090232 1.06043266\n",
+ " 0.98081056 0.75772897 1.08104732 0.9089776 1.15958148 1.00438092\n",
+ " 1.05438911 1.03252653 0.93247248 0.93432992 0.93471573 0.92823884\n",
+ " 1.05538732 1.13240733 1.0118344 1.01661273 1.01485342 0.96459444\n",
+ " 1.04193056 1.18869527 1.16815771 1.02936492 1.14122272 0.82092251\n",
+ " 0.65157783 0.96625788 0.93758166 0.85583113 0.92212169 0.95416574\n",
+ " 1.09885605 1.05532617 0.95052343 0.85001592 0.90288104 1.02070347\n",
+ " 0.9910173 1.02283995 0.94823304 0.96876862 0.89640495 0.69815509\n",
+ " 0.9166788 1.1669694 0.79707556 1.03583173 0.92546341 0.85519116\n",
+ " 0.96564996 0.78980487 0.86721777 1.11015395 0.86693397 1.23404074\n",
+ " 1.1863865 1.23666438 1.22999959 0.75050197 0.9030325 0.82651668\n",
+ " 0.82787442 0.94635584 1.03702504 0.92719724 1.07221881 1.03686381\n",
+ " 1.15469181 0.89674719 1.08831235 1.13528419 1.08223522 1.018505\n",
+ " 1.02420779 1.03345171 1.03119722 0.99911417 0.957303 1.14088651\n",
+ " 1.03717215 1.08764631 0.82683466 1.23448336 0.88923445 0.85993213\n",
+ " 0.68923161 0.94315235 0.97900752 0.67485393 0.64281205 0.87774833\n",
+ " 0.87393218 0.95165618 1.03133383 0.97942894 0.90210028 1.05086549\n",
+ " 0.95315535 1.10491227 1.03686486 1.06282383 1.0014064 0.91883733\n",
+ " 1.09846382 0.84950593 0.81626292 0.8703657 0.67982803 0.76059613\n",
+ " 1.10172704 0.98763897 0.7122062 0.93786655 0.9868919 0.96458227\n",
+ " 0.79161396 0.96745467 1.10726904 0.99403452 0.99363356 0.93037827\n",
+ " 0.79407271 0.67904483 1.00324363 0.96443237 0.83897343 1.20022918\n",
+ " 0.82448953 0.91247807 0.82581604 1.03821939 0.90239292 1.07105127\n",
+ " 0.92985275 0.9981976 0.86114335 0.8162548 0.86867626 0.72635989\n",
+ " 0.92759527 0.90816387 0.93577216 1.17372644 1.03301394 1.06459649\n",
+ " 0.58874733 1.05991393 0.88987226 0.9104593 0.65704581 1.06682764\n",
+ " 1.02652731 1.12319816 1.02808578 1.08283244 0.92657779 0.73786823\n",
+ " 0.87818219 0.88290965 0.79747457 1.00588162 0.98819968 0.60999417\n",
+ " 0.39757294 0.94248179 1.05642378 0.928897 1.24910293 1.05663498\n",
+ " 1.0849922 0.98459731 1.12373466 1.13791357 1.10705566 0.90268501\n",
+ " 1.18259281 0.86631726 0.97527969 0.68250815 0.83992296 0.72678555\n",
+ " 1.07451745 1.09113936 1.02545058 1.13284515 1.00039577 1.08344863\n",
+ " 1.04318097 0.94725113 1.05362747 0.99910121 1.12756812 0.67115132\n",
+ " 0.50542526 1.13296085 1.02631297 1.19177276 1.13911577 1.17974807\n",
+ " 1.19772706 1.18763178 1.21621899 1.07912013 0.82140562 0.96193096\n",
+ " 0.8272436 0.85481195 0.84613397 0.79956798 0.92009948 0.90890503\n",
+ " 1.03827975 0.92580363 0.89434109 0.78130318 0.94034923 0.79430347\n",
+ " 0.9164524 0.88719378 0.73640769 0.70285096 0.78387729 0.96944138\n",
+ " 1.01315293 1.05315374 0.7780931 0.69413911 0.80136584 0.87129166\n",
+ " 0.79387604 0.98760668 1.24645935 0.78377374 0.79493639 1.10810633\n",
+ " 1.12340137 0.99805872 1.16981475 1.05243879 0.86693227 1.0476084\n",
+ " 0.77760517 0.94823308 0.93462357 0.90584752 0.84839799 0.89676079\n",
+ " 1.11525851 0.94154443 0.84043537 0.62268094 1.06497016 0.90327737\n",
+ " 1.0296448 0.93374101 0.95922439 0.94618057 1.01347379 1.06706465\n",
+ " 1.12790036 0.93442679 1.08746225 1.03502658 0.91520857 0.98867871\n",
+ " 0.9915282 0.82873622 1.14676163 0.83269474 1.13503871 0.89726593\n",
+ " 0.96667056 0.92571072 0.96363185 1.02517711 0.98239794 0.87937194\n",
+ " 1.00824238 1.11325348 1.00435263 0.98525455 0.94852668 0.95222768\n",
+ " 1.01801052 0.97531076 1.10453643 0.86206364 0.88812561 0.89605646\n",
+ " 1.06637241 0.99500623 1.13707918 1.03183272 0.68721036 0.90310395\n",
+ " 0.89058679 0.69576729 0.74826766 0.91040064 0.85889324 0.70173909\n",
+ " 0.6084047 0.830279 0.98717624 0.57557374 0.78053113 0.67174819\n",
+ " 0.78001104 0.9523257 1.15471021 1.24403806 1.14381014 1.049939\n",
+ " 0.88849052 1.04800341 0.79657877 1.01468366 0.8237984 0.58749903\n",
+ " 1.05959324 0.78489375 1.0170763 0.86180766 0.87534372 0.87331439\n",
+ " 1.02821448 1.13117245 0.81871593 0.99382802 1.00961394 0.84659636\n",
+ " 0.94222816 1.15199156 1.17718003 1.05194392 0.91860547 1.12777126\n",
+ " 1.11686885 0.92935512 0.91161761 0.82375209 0.9328047 0.65141167\n",
+ " 0.77248368 0.65954934 0.72527156 0.69849172 0.79237805 1.2424139\n",
+ " 1.22237825 1.05194678 0.79252867 0.7923779 0.93735885 1.07523338\n",
+ " 0.91083628 0.9317345 1.02216282 0.76574201 0.90594523 0.9767441\n",
+ " 0.99759023 1.03441852 1.03821204 0.96799104 0.42307492 0.95620444\n",
+ " 1.01886452 0.98510088 1.03519092 0.68830384 0.72729148 1.02823686\n",
+ " 0.81946974 0.97986967 1.24133806 1.09114148 1.1806313 0.99885346\n",
+ " 0.61270573 1.04293971 1.12306437 1.03413065 1.00955125 1.0732613\n",
+ " 0.89691308 1.15169947 0.99786737 0.84291976 0.90474504 0.66699103\n",
+ " 0.7964573 0.96896622 0.97350175 1.03544537 0.90143008 1.12007117\n",
+ " 0.87817769 0.88706463 1.14025641 0.9589542 1.13425182 0.95753504\n",
+ " 0.98928059 1.17294968 1.00378764 1.00963321 1.03773316 1.09226741\n",
+ " 0.8460751 0.85820255 0.78381528 0.90506101 1.02696406 1.14555015\n",
+ " 1.02025169 0.93679619 1.10541732 0.9925462 1.02271942 0.97546385\n",
+ " 1.04929981 0.82458245 0.57557957 0.87319603 0.78771583 0.85893264\n",
+ " 0.95337789 1.13380847 0.94827243 1.09386854 1.13372751 0.78677396\n",
+ " 0.85439317 1.06781295 1.11221314 1.15904659 1.00060463 0.98173862\n",
+ " 0.95179128 0.81908124 1.03182384 1.05088075 0.99542074 0.93917353\n",
+ " 0.9280782 1.01179936 0.58100818 1.06657814 1.22463517 1.19251864\n",
+ " 1.03081028 1.06579009 1.0865027 0.97509951 1.08919383 1.07088622\n",
+ " 0.88604682 0.96118678 1.03694739 0.68591457 1.05546631 0.91181083\n",
+ " 0.98262121 0.94778371 1.11439945 0.97633352 0.964915 1.13867615\n",
+ " 1.1179955 1.10409814 0.98138727 1.05130809 0.96141391 0.86876938\n",
+ " 0.95252924 0.91652965 0.75280956 0.90630333 1.02831875 1.03302042\n",
+ " 0.97673868 1.10415458 1.0962827 1.06599447 0.91712704 1.17343477\n",
+ " 0.84102231 1.1039787 1.09676816 0.94735573 1.14905006 0.96544546\n",
+ " 1.01811515 1.14665696 0.83760722 0.97669814 0.7806451 1.06639228\n",
+ " 1.16535421 1.06304899 1.05594025 1.05733307 1.02019525 1.00608368\n",
+ " 1.00962811 1.03944518 1.02169401 1.08549306 0.82319006 1.01159241\n",
+ " 0.94157952 1.07250184 0.94161454 0.9720459 0.65854238 0.81315305\n",
+ " 1.07131082 0.97547404 0.90212735 1.04709614 0.85534682 1.06743073\n",
+ " 0.89346954 0.86326391 1.00706814 1.0897298 0.91436948 0.75487052\n",
+ " 0.75596688 0.94565841 0.95165233 0.93634702 0.96973108 0.83637775\n",
+ " 0.98326613 0.6558631 0.9933073 1.02725857 1.10585208 0.85823421\n",
+ " 1.21339282 1.06245269 1.17859525 0.86263995 1.14826524 1.07301848\n",
+ " 1.24015144 1.36251803 1.08949241 1.11080693 0.96488777 0.94466767\n",
+ " 1.04388683 1.17506031 0.94300428 0.91997956 0.98400407 0.99623632\n",
+ " 0.8633667 1.01946885 0.82632191 0.93506847 0.97591708 0.87672968\n",
+ " 0.71403487 0.97582197 1.0586424 1.02571066 0.86590501 1.1614363\n",
+ " 0.92650471 0.87893983 0.97773397 0.98611551 0.97761033 1.04815723\n",
+ " 0.83754575 0.76343419 0.67277256 0.99086506 1.01927109 1.10385477\n",
+ " 0.96588783 0.86824653]\n",
+ "Columns: Index(['Country', 'Histologic_Grade_FIGO', 'Histologic_type',\n",
+ " 'Path_Stage_Primary_Tumor-pT', 'Path_Stage_Reg_Lymph_Nodes-pN',\n",
+ " 'Clin_Stage_Dist_Mets-cM', 'Path_Stage_Dist_Mets-pM',\n",
+ " 'tumor_Stage-Pathological', 'FIGO_stage', 'BMI', 'Age', 'Race',\n",
+ " 'Ethnicity', 'Gender', 'Tumor_Site', 'Tumor_Focality', 'Tumor_Size_cm'],\n",
+ " dtype='object')\n",
+ "Topk matches: [{'source_column': 'BMI', 'top_k_columns': [ColumnScore(column_name='average_base_quality', score=0.7367687041662533), ColumnScore(column_name='bmi', score=0.7158004578456193), ColumnScore(column_name='body_surface_area', score=0.6174846726008038), ColumnScore(column_name='intermediate_dimension', score=0.6024270602089243), ColumnScore(column_name='recist_targeted_regions_sum', score=0.5769250770602694), ColumnScore(column_name='longest_dimension', score=0.4945747379045601), ColumnScore(column_name='percent_stromal_cells', score=0.4244262568444388), ColumnScore(column_name='sequencing_date', score=0.4244204325614378), ColumnScore(column_name='spindle_cell_percent', score=0.4189918216712454), ColumnScore(column_name='pmid', score=0.4125009713289247)]}]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " target | \n",
+ " similarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " BMI | \n",
+ " average_base_quality | \n",
+ " 0.736769 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " BMI | \n",
+ " bmi | \n",
+ " 0.715800 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " BMI | \n",
+ " body_surface_area | \n",
+ " 0.617485 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BMI | \n",
+ " intermediate_dimension | \n",
+ " 0.602427 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " BMI | \n",
+ " recist_targeted_regions_sum | \n",
+ " 0.576925 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " BMI | \n",
+ " longest_dimension | \n",
+ " 0.494575 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " BMI | \n",
+ " percent_stromal_cells | \n",
+ " 0.424426 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " BMI | \n",
+ " sequencing_date | \n",
+ " 0.424420 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " BMI | \n",
+ " spindle_cell_percent | \n",
+ " 0.418992 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " BMI | \n",
+ " pmid | \n",
+ " 0.412501 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source target similarity\n",
+ "0 BMI average_base_quality 0.736769\n",
+ "1 BMI bmi 0.715800\n",
+ "2 BMI body_surface_area 0.617485\n",
+ "3 BMI intermediate_dimension 0.602427\n",
+ "4 BMI recist_targeted_regions_sum 0.576925\n",
+ "5 BMI longest_dimension 0.494575\n",
+ "6 BMI percent_stromal_cells 0.424426\n",
+ "7 BMI sequencing_date 0.424420\n",
+ "8 BMI spindle_cell_percent 0.418992\n",
+ "9 BMI pmid 0.412501"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bdi.top_matches(dataset, columns=[\"BMI\"], target=\"gdc\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.top_matches(dataset, columns=[\"BMI\"], target=\"gdc\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.match_schema(dataset[[\"BMI\"]], target=\"gdc\", method=\"ct_learning\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.top_matches(dataset, columns=[\"Histologic_type\"], target=\"gdc\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.top_matches(dataset, columns=[\"Histologic_type\"], target=\"gdc\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.match_schema(dataset[[\"FIGO_stage\", \"BMI\"]], target=\"gdc\", method=\"ct_learning\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "column_mappings = bdi.match_schema(dataset[columns], target=\"gdc\", method=\"two_phase\")\n",
+ "column_mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Generating a harmonized table\n",
+ "\n",
+ "After discovering a schema mapping, we can generate a new table (DataFrame) using the new column names from the GDC standard vocabulary.\n",
+ "\n",
+ "To do so using `bdi-kit`, we can use the function `materialize_mapping()` as follows. Note that the column headers have been renamed to the target schema."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.materialize_mapping(dataset, column_mappings)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Generating a harmonized table with value mappings\n",
+ "\n",
+ "`bdi-kit` can also help with translation of the values from the source table to the target standard format.\n",
+ "\n",
+ "To this end, `bdi-kit` provides the function `match_values()` that automatically creates value mappings for each string column.\n",
+ "The output of `match_values()` can be fed to `materialize_mapping()` which materialized the final target using both schema and value mappings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# JF: why do we have so many \"None\" in dysplasia_type?\n",
+ "# and why do we have fewer columns -- the previous dataframe has 10 attributes\n",
+ "# and this has 7\n",
+ "value_mappings = bdi.match_values(dataset, column_mapping=column_mappings, target=\"gdc\", method=\"tfidf\")\n",
+ "bdi.materialize_mapping(dataset, value_mappings)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Verifying the schema mappings\n",
+ "\n",
+ "Sometimes the mappings generated automatically may be incorrect or you may to want verify them individually.\n",
+ "To verify the suggested column mappings, `bdi-kit` offers additional APIs to visualize the data and make any modifications when necessary. \n",
+ "\n",
+ "For this example, we will use the column `Histologic_type`. We can start by exploring the columns most similar to `Histologic_type`. \n",
+ "\n",
+ "For this, we can use the `top_matches()` function. Here, we notice that `primary_diagnosis` could be a potential target column.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hist_type_matches = bdi.top_matches(dataset, columns=[\"Histologic_type\"], sample=true, attrib_desc=true, target=\"gdc\")\n",
+ "hist_type_matches"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Viewing the column domains\n",
+ "\n",
+ "To verify that `primary_diagnosis` is a good target column, we view and compare the domains of each column using the `preview_domain()` function. For the source table, it returns the list of unique values in the source column. For the GDC target, it returns the list of unique valid values that a column can have.\n",
+ "\n",
+ "Here we see that the values seem to be related."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.preview_domain(dataset, \"Histologic_type\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "bdi.preview_domain(\"gdc\", \"primary_diagnosis\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### JF: actually, I don't see why this is a correct match, maybe we should have another function that looks for probable matches -- given the values for the first column, return a sample of the second column with similar values. \n",
+ "Since `primary_diagnosis` looks like a correct match for `Histologic_type`, we can modify the `column_mappings` variable directly."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "column_mappings.loc[column_mappings[\"source\"] == \"Histologic_type\", \"target\"] = \"primary_diagnosis\"\n",
+ "column_mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Finding correct value mappings\n",
+ "\n",
+ "After finding the correct column, we need to find appropriate value mappings. \n",
+ "Using `preview_value_mappings()`, we can inspect what the possible value mappings for this would look like after the harmonization.\n",
+ "\n",
+ "`bdi-kit` implements multiple methods for value mapping discovery, including:\n",
+ "\n",
+ " - `edit_distance` - Computes value similarities using Levenstein's edit distance measure.\n",
+ " - `tfidf` - A method based on tf-idf importance weighting computed over charcter n-grams.\n",
+ " - `embeddings` - Uses BERT word embeddings to compute \"semantic similarity\" between the values.\n",
+ "\n",
+ "To specify a value mapping approach, we can pass the `method` parameter."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.preview_value_mappings(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"edit_distance\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.preview_value_mappings(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"tfidf\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.preview_value_mappings(\n",
+ " dataset, column_mapping=(\"Histologic_type\", \"primary_diagnosis\"), target=\"gdc\", method=\"embedding\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# JF add context - -manual map\n",
+ "hist_type_vmap = pd.DataFrame(\n",
+ " columns=[\"source\", \"target\"],\n",
+ " data=[\n",
+ " (\"Carcinosarcoma\", \"Carcinosarcoma, NOS\"),\n",
+ " (\"Clear cell\", \"Clear cell adenocarcinoma, NOS\"),\n",
+ " (\"Endometrioid\", \"Endometrioid carcinoma\"),\n",
+ " (\"Serous\", \"Serous cystadenocarcinoma\"),\n",
+ " ],\n",
+ ")\n",
+ "hist_type_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Verifying multiple value mappings at once\n",
+ "\n",
+ "Besides verifying value mappings individually, you can also do it for all column mappings at once."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mappings = bdi.preview_value_mappings(\n",
+ " dataset,\n",
+ " column_mapping=column_mappings,\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "\n",
+ "for mapping in mappings:\n",
+ " print(f\"{mapping['source']} => {mapping['target']}\")\n",
+ " display(mapping[\"mapping\"])\n",
+ " print(\"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Fixing remaining value mappings\n",
+ "\n",
+ "We need fix a few value mappings:\n",
+ "- Race\n",
+ "- Ethnicity\n",
+ "- Tumor_Site\n",
+ "\n",
+ "For race, we need to fix: `nan` -> `merican indian or alaska native`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "race_vmap = bdi.preview_value_mappings(\n",
+ " dataset,\n",
+ " column_mapping=(\"Race\", \"race\"),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "race_vmap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "race_vmap = race_vmap[race_vmap[\"similarity\"] >= 1.0]\n",
+ "race_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For `Ethnicity`, we need to fix: `Not reported` -> `not hispanic or latino`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ethinicity_vmap = bdi.preview_value_mappings(\n",
+ " dataset,\n",
+ " column_mapping=(\"Ethnicity\", \"ethnicity\"),\n",
+ " target=\"gdc\",\n",
+ " method=\"tfidf\",\n",
+ ")\n",
+ "ethinicity_vmap\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ethinicity_vmap = ethinicity_vmap[ethinicity_vmap[\"similarity\"] > 0.9]\n",
+ "ethinicity_vmap"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For `Tumor_Site`, given that this dataset is about endometrial cancer, all values must be mapped to \"Endometrium\". So instead of fixing each mapping individually, we will write a custom function that returns \"Endometrium\" regardless of the input value. Later, we will show how to use this function to transform the dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bdi.preview_value_mappings(\n",
+ " dataset, column_mapping=(\"Tumor_Site\", \"tissue_or_organ_of_origin\"), target=\"gdc\", method=\"tfidf\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Custom mapping function that will be used to map the values of the 'Tumor_Site' column\n",
+ "# JF: why does this always return Endometrium?\n",
+ "def map_tumor_site(source_value):\n",
+ " return \"Endometrium\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Combining custom user mappings with suggested mappings\n",
+ "\n",
+ "Before generating a final harmonized dataset, we can combine the automatically generated value mappings with the fixed mappings provided by the user. To do so, we use `bdi.update_mappings()` functions, which take a list of mappings (e.g., generated automatically) and a list of \"user-defined mapping overrides\" that will be combined with the first list of mappings and will take precedence whenever they conflict.\n",
+ "\n",
+ "In our example below, all mappings specified in the variable `user_mappings` will override the mappings in `value_mappings` generated by the `bdi.match_values()` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from math import ceil\n",
+ "\n",
+ "user_mappings = [\n",
+ " {\n",
+ " \"source\": \"Tumor_Site\",\n",
+ " \"target\": \"tissue_or_organ_of_origin\",\n",
+ " \"mapper\": map_tumor_site,\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"BMI\",\n",
+ " \"target\": \"bmi\",\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"Age\",\n",
+ " \"target\": \"days_to_birth\",\n",
+ " \"mapper\": lambda age: -age * 365.25,\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"Age\",\n",
+ " \"target\": \"age_at_diagnosis\",\n",
+ " \"mapper\": lambda age: float(\"nan\") if pd.isnull(age) else ceil(age*365.25),\n",
+ " },\n",
+ " {\n",
+ " \"source\": \"Tumor_Size_cm\",\n",
+ " \"target\": \"tumor_largest_dimension_diameter\",\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "value_mappings = bdi.match_values(\n",
+ " dataset, target=\"gdc\", column_mapping=column_mappings, method=\"tfidf\"\n",
+ ")\n",
+ "\n",
+ "harmonization_spec = bdi.update_mappings(value_mappings, user_mappings)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we generate the harmonized dataset, with the user-defined value mappings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# JF: are there still incorrect matches and mappings?\n",
+ "harmonized_dataset = bdi.materialize_mapping(dataset, harmonization_spec)\n",
+ "harmonized_dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For comparison, here is how our original data looked like:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "original_columns = map(lambda m: m[\"source\"], harmonization_spec)\n",
+ "dataset[original_columns]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}