diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 46fed25d76094..f9f1922595248 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -115,14 +115,14 @@ PRIMARY KEY (aid,dic) load stats 's/explain_complex_stats_rr.json'; explain format = 'brief' SELECT ds, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(dic) as install_device FROM dt use index (cm) WHERE (ds >= '2016-09-01') AND (ds <= '2016-11-03') AND (cm IN ('1062', '1086', '1423', '1424', '1425', '1426', '1427', '1428', '1429', '1430', '1431', '1432', '1433', '1434', '1435', '1436', '1437', '1438', '1439', '1440', '1441', '1442', '1443', '1444', '1445', '1446', '1447', '1448', '1449', '1450', '1451', '1452', '1488', '1489', '1490', '1491', '1492', '1493', '1494', '1495', '1496', '1497', '1550', '1551', '1552', '1553', '1554', '1555', '1556', '1557', '1558', '1559', '1597', '1598', '1599', '1600', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1613', '1614', '1615', '1616', '1623', '1624', '1625', '1626', '1627', '1628', '1629', '1630', '1631', '1632', '1709', '1719', '1720', '1843', '2813', '2814', '2815', '2816', '2817', '2818', '2819', '2820', '2821', '2822', '2823', '2824', '2825', '2826', '2827', '2828', '2829', '2830', '2831', '2832', '2833', '2834', '2835', '2836', '2837', '2838', '2839', '2840', '2841', '2842', '2843', '2844', '2845', '2846', '2847', '2848', '2849', '2850', '2851', '2852', '2853', '2854', '2855', '2856', '2857', '2858', '2859', '2860', '2861', '2862', '2863', '2864', '2865', '2866', '2867', '2868', '2869', '2870', '2871', '2872', '3139', '3140', '3141', '3142', '3143', '3144', '3145', '3146', '3147', '3148', '3149', '3150', '3151', '3152', '3153', '3154', '3155', '3156', '3157', '3158', '3386', '3387', '3388', '3389', '3390', '3391', '3392', '3393', '3394', '3395', '3664', '3665', '3666', '3667', '3668', '3670', '3671', '3672', '3673', '3674', '3676', '3677', '3678', '3679', '3680', '3681', '3682', '3683', '3684', '3685', '3686', '3687', '3688', '3689', '3690', '3691', '3692', '3693', '3694', '3695', '3696', '3697', '3698', '3699', '3700', '3701', '3702', '3703', '3704', '3705', '3706', '3707', '3708', '3709', '3710', '3711', '3712', '3713', '3714', '3715', '3960', '3961', '3962', '3963', '3964', '3965', '3966', '3967', '3968', '3978', '3979', '3980', '3981', '3982', '3983', '3984', '3985', '3986', '3987', '4208', '4209', '4210', '4211', '4212', '4304', '4305', '4306', '4307', '4308', '4866', '4867', '4868', '4869', '4870', '4871', '4872', '4873', '4874', '4875')) GROUP BY ds, p1, p2, p3, p4, p5, p6_md5, p7_md5 ORDER BY ds2 DESC; id estRows task access object operator info -Projection 21.53 root test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21 -└─Sort 21.53 root test.dt.ds2:desc - └─HashAgg 21.53 root group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5 - └─IndexLookUp 21.53 root - ├─IndexRangeScan(Build) 128.32 cop[tikv] table:dt, index:cm(cm) range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false - └─HashAgg(Probe) 21.53 cop[tikv] group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34 - └─Selection 21.56 cop[tikv] ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000) - └─TableRowIDScan 128.32 cop[tikv] table:dt keep order:false +Projection 21.47 root test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21 +└─Sort 21.47 root test.dt.ds2:desc + └─HashAgg 21.47 root group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5 + └─IndexLookUp 21.47 root + ├─IndexRangeScan(Build) 128.00 cop[tikv] table:dt, index:cm(cm) range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false + └─HashAgg(Probe) 21.47 cop[tikv] group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34 + └─Selection 21.50 cop[tikv] ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000) + └─TableRowIDScan 128.00 cop[tikv] table:dt keep order:false explain format = 'brief' select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext, gad.t as gtime from st gad join (select id, aid, pt, dic, ip, t from dd where pt = 'android' and bm = 0 and t > 1478143908) sdk on gad.aid = sdk.aid and gad.ip = sdk.ip and sdk.t > gad.t where gad.t > 1478143908 and gad.bm = 0 and gad.pt = 'android' group by gad.aid, sdk.dic limit 2500; id estRows task access object operator info Projection 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t @@ -132,8 +132,8 @@ Projection 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd ├─TableReader(Build) 424.00 root data:Selection │ └─Selection 424.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.pt, "android"), gt(test.st.t, 1478143908), not(isnull(test.st.ip)) │ └─TableRangeScan 1999.00 cop[tikv] table:gad range:[0,+inf], keep order:false - └─TableReader(Probe) 455.80 root data:Selection - └─Selection 455.80 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) + └─TableReader(Probe) 450.56 root data:Selection + └─Selection 450.56 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) └─TableRangeScan 2000.00 cop[tikv] table:dd range:[0,+inf], keep order:false explain format = 'brief' select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000; id estRows task access object operator info @@ -169,11 +169,11 @@ Projection 428.32 root test.dt.id, test.dt.aid, test.dt.pt, test.dt.dic, test.d └─TableRowIDScan 1.00 cop[tikv] table:rr keep order:false explain format = 'brief' select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr; id estRows task access object operator info -Projection 207.86 root test.pp.pc, test.pp.cr, Column#22, Column#23, Column#24 -└─HashAgg 207.86 root group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid)->Column#22, funcs:count(test.pp.oid)->Column#23, funcs:sum(test.pp.am)->Column#24, funcs:firstrow(test.pp.pc)->test.pp.pc, funcs:firstrow(test.pp.cr)->test.pp.cr - └─IndexLookUp 207.86 root +Projection 207.02 root test.pp.pc, test.pp.cr, Column#22, Column#23, Column#24 +└─HashAgg 207.02 root group by:test.pp.cr, test.pp.pc, funcs:count(distinct test.pp.uid)->Column#22, funcs:count(test.pp.oid)->Column#23, funcs:sum(test.pp.am)->Column#24, funcs:firstrow(test.pp.pc)->test.pp.pc, funcs:firstrow(test.pp.cr)->test.pp.cr + └─IndexLookUp 207.02 root ├─IndexRangeScan(Build) 627.00 cop[tikv] table:pp, index:ps(ps) range:[2,2], keep order:false - └─Selection(Probe) 207.86 cop[tikv] ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200) + └─Selection(Probe) 207.02 cop[tikv] ge(test.pp.ppt, 1478188800), in(test.pp.pi, 510017, 520017), in(test.pp.uid, 18089709, 18090780), lt(test.pp.ppt, 1478275200) └─TableRowIDScan 627.00 cop[tikv] table:pp keep order:false drop table if exists tbl_001; CREATE TABLE tbl_001 (a int, b int); diff --git a/planner/core/testdata/analyze_suite_out.json b/planner/core/testdata/analyze_suite_out.json index cb0dd2137c515..ac434e5d18f25 100644 --- a/planner/core/testdata/analyze_suite_out.json +++ b/planner/core/testdata/analyze_suite_out.json @@ -60,8 +60,8 @@ "SQL": "explain format = 'brief' select * from t where a <= 5 and b <= 5", "RatioOfPseudoEstimate": 10, "Plan": [ - "TableReader 29.77 root data:Selection", - "└─Selection 29.77 cop[tikv] le(test.t.a, 5), le(test.t.b, 5)", + "TableReader 28.80 root data:Selection", + "└─Selection 28.80 cop[tikv] le(test.t.a, 5), le(test.t.b, 5)", " └─TableFullScan 80.00 cop[tikv] table:t keep order:false" ] }, @@ -454,18 +454,18 @@ { "SQL": "explain format = 'brief' select * from t where a = 7639902", "Plan": [ - "IndexReader 6.68 root index:IndexRangeScan", - "└─IndexRangeScan 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" + "IndexReader 5.95 root index:IndexRangeScan", + "└─IndexRangeScan 5.95 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" ] }, { "SQL": "explain format = 'brief' select c, b from t where a = 7639902 order by b asc limit 6", "Plan": [ - "Projection 6.00 root test.t.c, test.t.b", - "└─TopN 6.00 root test.t.b, offset:0, count:6", - " └─IndexReader 6.00 root index:TopN", - " └─TopN 6.00 cop[tikv] test.t.b, offset:0, count:6", - " └─IndexRangeScan 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" + "Projection 5.95 root test.t.c, test.t.b", + "└─TopN 5.95 root test.t.b, offset:0, count:6", + " └─IndexReader 5.95 root index:TopN", + " └─TopN 5.95 cop[tikv] test.t.b, offset:0, count:6", + " └─IndexRangeScan 5.95 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" ] } ] diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 07d90434a6cc7..80a72b77b8c63 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -530,14 +530,6 @@ func (c *TopN) Num() int { return len(c.TopN) } -// outOfRange checks whether the the given value falls back in [TopN.LowestOne, TopN.HighestOne]. -func (c *TopN) outOfRange(val []byte) bool { - if c == nil || len(c.TopN) == 0 { - return true - } - return bytes.Compare(c.TopN[0].Encoded, val) > 0 || bytes.Compare(val, c.TopN[c.Num()-1].Encoded) > 0 -} - // DecodedString returns the value with decoded result. func (c *TopN) DecodedString(ctx sessionctx.Context, colTypes []byte) (string, error) { builder := &strings.Builder{} @@ -775,7 +767,7 @@ func MergePartTopN2GlobalTopN(sc *stmtctx.StatementContext, version int, topNs [ datum = d } // Get the row count which the value is equal to the encodedVal from histogram. - count := hists[j].equalRowCount(datum, isIndex) + count, _ := hists[j].equalRowCount(datum, isIndex) if count != 0 { counter[encodedVal] += count // Remove the value corresponding to encodedVal from the histogram. diff --git a/statistics/handle/update.go b/statistics/handle/update.go index 5580930f99b78..6038a1c0213a3 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -1254,12 +1254,10 @@ func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error { expected := 0.0 if isIndex { idx := t.Indices[id] - expected, err = idx.GetRowCount(sc, nil, ranges, t.ModifyCount) - expected *= idx.GetIncreaseFactor(t.Count) + expected, err = idx.GetRowCount(sc, nil, ranges, t.Count) } else { c := t.Columns[id] - expected, err = c.GetColumnRowCount(sc, ranges, t.ModifyCount, true) - expected *= c.GetIncreaseFactor(t.Count) + expected, err = c.GetColumnRowCount(sc, ranges, t.Count, true) } q.Expected = int64(expected) return err diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index 9b0438ab2f655..39dcdfefb299e 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -1815,8 +1815,8 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) { sql: "select * from t where a = 2 and b > 10", hist: "column:2 ndv:20 totColSize:20\n" + "num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0 ndv: 0\n" + - "num: 4 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" + - "num: 5 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0 ndv: 0", + "num: 6 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0 ndv: 0", rangeID: tblInfo.Columns[1].ID, idxID: tblInfo.Indices[0].ID, eqCount: 3, diff --git a/statistics/histogram.go b/statistics/histogram.go index 614ede5e0b265..7b2c7610b4b7d 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -425,35 +425,37 @@ func (hg *Histogram) ToString(idxCols int) string { } // equalRowCount estimates the row count where the column equals to value. -func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) float64 { +// matched: return true if this returned row count is from Bucket.Repeat or bucket NDV, which is more accurate than if not. +func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) (count float64, matched bool) { index, match := hg.Bounds.LowerBound(0, &value) // Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound. if index%2 == 1 { if match { - return float64(hg.Buckets[index/2].Repeat) + return float64(hg.Buckets[index/2].Repeat), true } if hasBucketNDV && hg.Buckets[index/2].NDV > 1 { - return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1) + return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1), true } - return hg.notNullCount() / float64(hg.NDV) + return hg.notNullCount() / float64(hg.NDV), false } if match { cmp := chunk.GetCompareFunc(hg.Tp) if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 { - return float64(hg.Buckets[index/2].Repeat) + return float64(hg.Buckets[index/2].Repeat), true } if hasBucketNDV && hg.Buckets[index/2].NDV > 1 { - return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1) + return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1), true } - return hg.notNullCount() / float64(hg.NDV) + return hg.notNullCount() / float64(hg.NDV), false } - return 0 + return 0, false } // greaterRowCount estimates the row count where the column greater than value. // It's deprecated. Only used for test. func (hg *Histogram) greaterRowCount(value types.Datum) float64 { - gtCount := hg.notNullCount() - hg.lessRowCount(value) - hg.equalRowCount(value, false) + histRowCount, _ := hg.equalRowCount(value, false) + gtCount := hg.notNullCount() - hg.lessRowCount(value) - histRowCount return math.Max(0, gtCount) } @@ -549,12 +551,12 @@ func (hg *Histogram) mergeBuckets(bucketIdx int) { } // GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified. -func (idx *Index) GetIncreaseFactor(totalCount int64) float64 { +func (idx *Index) GetIncreaseFactor(realtimeRowCount int64) float64 { columnCount := idx.TotalRowCount() if columnCount == 0 { return 1.0 } - return float64(totalCount) / columnCount + return float64(realtimeRowCount) / columnCount } // BetweenRowCount estimates the row count for interval [l, r). @@ -826,12 +828,120 @@ func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64 { func (hg *Histogram) outOfRange(val types.Datum) bool { if hg.Len() == 0 { - return true + return false } return chunk.Compare(hg.Bounds.GetRow(0), 0, &val) > 0 || chunk.Compare(hg.Bounds.GetRow(hg.Bounds.NumRows()-1), 0, &val) < 0 } +// outOfRangeRowCount estimate the row count of part of [lDatum, rDatum] which is out of range of the histogram. +// Here we assume the density of data is decreasing from the lower/upper bound of the histogram toward outside. +// The maximum row count it can get is the increaseCount. It reaches the maximum when out-of-range width reaches histogram range width. +// As it shows below. To calculate the out-of-range row count, we need to calculate the percentage of the shaded area. +// Note that we assume histL-boundL == histR-histL == boundR-histR here. +// +// /│ │\ +// / │ │ \ +// /x│ │◄─histogram─►│ \ +// / xx│ │ range │ \ +// / │xxx│ │ │ \ +// / │xxx│ │ │ \ +//────┴────┴───┴──┴─────────────┴───────────┴───── +// ▲ ▲ ▲ ▲ ▲ ▲ +// │ │ │ │ │ │ +// boundL │ │histL histR boundR +// │ │ +// lDatum rDatum +func (hg *Histogram) outOfRangeRowCount(lDatum, rDatum *types.Datum, increaseCount int64) float64 { + if hg.Len() == 0 { + return 0 + } + + // For bytes and string type, we need to cut the common prefix when converting them to scalar value. + // Here we calculate the length of common prefix. + commonPrefix := 0 + if hg.GetLower(0).Kind() == types.KindBytes || hg.GetLower(0).Kind() == types.KindString { + // Calculate the common prefix length among the lower and upper bound of histogram and the range we want to estimate. + commonPrefix = commonPrefixLength(hg.GetLower(0).GetBytes(), + hg.GetUpper(hg.Len()-1).GetBytes(), + lDatum.GetBytes(), + rDatum.GetBytes()) + } + + // Convert the range we want to estimate to scalar value(float64) + l := convertDatumToScalar(lDatum, commonPrefix) + r := convertDatumToScalar(rDatum, commonPrefix) + // If this is an unsigned column, we need to make sure values are not negative. + // Normal negative value should have become 0. But this still might happen when met MinNotNull here. + // Maybe it's better to do this transformation in the ranger like the normal negative value. + if mysql.HasUnsignedFlag(hg.Tp.Flag) { + if l < 0 { + l = 0 + } + if r < 0 { + r = 0 + } + } + + // make sure l < r + if l >= r { + return 0 + } + // Convert the lower and upper bound of the histogram to scalar value(float64) + histL := convertDatumToScalar(hg.GetLower(0), commonPrefix) + histR := convertDatumToScalar(hg.GetUpper(hg.Len()-1), commonPrefix) + histWidth := histR - histL + if histWidth <= 0 { + return 0 + } + boundL := histL - histWidth + boundR := histR + histWidth + + leftPercent := float64(0) + rightPercent := float64(0) + + // keep l and r unchanged, use actualL and actualR to calculate. + actualL := l + actualR := r + // If the range overlaps with (boundL,histL), we need to handle the out-of-range part on the left of the histogram range + if actualL < histL && actualR > boundL { + // make sure boundL <= actualL < actualR <= histL + if actualL < boundL { + actualL = boundL + } + if actualR > histL { + actualR = histL + } + // Calculate the percentage of "the shaded area" on the left side. + leftPercent = (math.Pow(actualR-boundL, 2) - math.Pow(actualL-boundL, 2)) / math.Pow(histWidth, 2) + } + + actualL = l + actualR = r + // If the range overlaps with (histR,boundR), we need to handle the out-of-range part on the right of the histogram range + if actualL < boundR && actualR > histR { + // make sure histR <= actualL < actualR <= boundR + if actualL < histR { + actualL = histR + } + if actualR > boundR { + actualR = boundR + } + // Calculate the percentage of "the shaded area" on the right side. + rightPercent = (math.Pow(boundR-actualL, 2) - math.Pow(boundR-actualR, 2)) / math.Pow(histWidth, 2) + } + + totalPercent := leftPercent*0.5 + rightPercent*0.5 + if totalPercent > 1 { + totalPercent = 1 + } + rowCount := totalPercent * hg.notNullCount() + if rowCount > float64(increaseCount) { + return float64(increaseCount) + } + return rowCount +} + // Copy deep copies the histogram. func (hg *Histogram) Copy() *Histogram { newHist := *hg @@ -928,13 +1038,13 @@ func (c *Column) notNullCount() float64 { } // GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified. -func (c *Column) GetIncreaseFactor(totalCount int64) float64 { +func (c *Column) GetIncreaseFactor(realtimeRowCount int64) float64 { columnCount := c.TotalRowCount() if columnCount == 0 { // avoid dividing by 0 return 1.0 } - return float64(totalCount) / columnCount + return float64(realtimeRowCount) / columnCount } // MemoryUsage returns the total memory usage of Histogram and CMSketch in Column. @@ -967,7 +1077,7 @@ func (c *Column) IsInvalid(sc *stmtctx.StatementContext, collPseudo bool) bool { return c.TotalRowCount() == 0 || (c.Histogram.NDV > 0 && c.notNullCount() == 0) } -func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, encodedVal []byte, modifyCount int64) (float64, error) { +func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, encodedVal []byte, realtimeRowCount int64) (float64, error) { if val.IsNull() { return float64(c.NullCount), nil } @@ -976,23 +1086,22 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, en if c.Histogram.Bounds.NumRows() == 0 { return 0.0, nil } - if c.Histogram.NDV > 0 && c.outOfRange(val, encodedVal) { - return outOfRangeEQSelectivity(c.Histogram.NDV, modifyCount, int64(c.TotalRowCount())) * c.TotalRowCount(), nil + if c.Histogram.NDV > 0 && c.outOfRange(val) { + return outOfRangeEQSelectivity(c.Histogram.NDV, realtimeRowCount, int64(c.TotalRowCount())) * c.TotalRowCount(), nil } if c.CMSketch != nil { count, err := queryValue(sc, c.CMSketch, c.TopN, val) return float64(count), errors.Trace(err) } - return c.Histogram.equalRowCount(val, false), nil + histRowCount, _ := c.Histogram.equalRowCount(val, false) + return histRowCount, nil } + + // Stats version == 2 // All the values are null. if c.Histogram.Bounds.NumRows() == 0 && c.TopN.Num() == 0 { return 0, nil } - if c.Histogram.NDV+int64(c.TopN.Num()) > 0 && c.outOfRange(val, encodedVal) { - return outOfRangeEQSelectivity(c.Histogram.NDV, modifyCount, int64(c.TotalRowCount())) * c.TotalRowCount(), nil - } - // Stats version == 2 // 1. try to find this value in TopN if c.TopN != nil { rowcount, ok := c.QueryTopN(encodedVal) @@ -1000,38 +1109,21 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, en return float64(rowcount), nil } } - // 2. try to find this value in bucket.repeats(the last value in every bucket) - index, match := c.Histogram.Bounds.LowerBound(0, &val) - if index%2 == 1 && match { - return float64(c.Histogram.Buckets[index/2].Repeat), nil - } - if match { - cmp := chunk.GetCompareFunc(c.Histogram.Tp) - if cmp(c.Histogram.Bounds.GetRow(index), 0, c.Histogram.Bounds.GetRow(index+1), 0) == 0 { - return float64(c.Histogram.Buckets[index/2].Repeat), nil - } - } - // 3. use uniform distribution assumption for the rest - cnt := c.Histogram.notNullCount() - for _, bkt := range c.Histogram.Buckets { - if cnt <= float64(bkt.Repeat) { - return 0, nil - } - cnt -= float64(bkt.Repeat) - } - topNLen := int64(0) - if c.TopN != nil { - topNLen = int64(len(c.TopN.TopN)) + // 2. try to find this value in bucket.Repeat(the last value in every bucket) + histCnt, matched := c.Histogram.equalRowCount(val, true) + if matched { + return histCnt, nil } - ndv := c.Histogram.NDV - topNLen - int64(len(c.Histogram.Buckets)) - if ndv <= 0 { + // 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats) + histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num())) + if histNDV <= 0 { return 0, nil } - return cnt / float64(ndv), nil + return c.Histogram.notNullCount() / histNDV, nil } // GetColumnRowCount estimates the row count by a slice of Range. -func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, pkIsHandle bool) (float64, error) { +func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, realtimeRowCount int64, pkIsHandle bool) (float64, error) { var rowCount float64 for _, rg := range ranges { highVal := *rg.HighVal[0].Clone() @@ -1055,7 +1147,7 @@ func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*range return 0, err } if cmp == 0 { - // the point case. + // case 1: it's a point if !rg.LowExclude && !rg.HighExclude { // In this case, the row count is at most 1. if pkIsHandle { @@ -1063,10 +1155,12 @@ func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*range continue } var cnt float64 - cnt, err = c.equalRowCount(sc, lowVal, lowEncoded, modifyCount) + cnt, err = c.equalRowCount(sc, lowVal, lowEncoded, realtimeRowCount) if err != nil { return 0, errors.Trace(err) } + // If the current table row count has changed, we should scale the row count accordingly. + cnt *= c.GetIncreaseFactor(realtimeRowCount) rowCount += cnt } continue @@ -1075,28 +1169,30 @@ func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*range // So for the small range, we convert it to points. if c.StatsVer < 2 { rangeVals := enumRangeValues(lowVal, highVal, rg.LowExclude, rg.HighExclude) - // The small range case. + + // case 2: it's a small range && using ver1 stats if rangeVals != nil { for _, val := range rangeVals { - cnt, err := c.equalRowCount(sc, val, lowEncoded, modifyCount) + cnt, err := c.equalRowCount(sc, val, lowEncoded, realtimeRowCount) if err != nil { return 0, err } + // If the current table row count has changed, we should scale the row count accordingly. + cnt *= c.GetIncreaseFactor(realtimeRowCount) rowCount += cnt } + continue } } - // The interval case. + + // case 3: it's an interval cnt := c.BetweenRowCount(sc, lowVal, highVal, lowEncoded, highEncoded) - if (c.outOfRange(lowVal, lowEncoded) && !lowVal.IsNull()) || c.outOfRange(highVal, highEncoded) { - cnt += outOfRangeEQSelectivity(outOfRangeBetweenRate, modifyCount, int64(c.TotalRowCount())) * c.TotalRowCount() - } - // `betweenRowCount` returns count for [l, h) range, we adjust cnt for boudaries here. + // `betweenRowCount` returns count for [l, h) range, we adjust cnt for boundaries here. // Note that, `cnt` does not include null values, we need specially handle cases // where null is the lower bound. if rg.LowExclude && !lowVal.IsNull() { - lowCnt, err := c.equalRowCount(sc, lowVal, lowEncoded, modifyCount) + lowCnt, err := c.equalRowCount(sc, lowVal, lowEncoded, realtimeRowCount) if err != nil { return 0, errors.Trace(err) } @@ -1106,31 +1202,41 @@ func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*range cnt += float64(c.NullCount) } if !rg.HighExclude { - highCnt, err := c.equalRowCount(sc, highVal, highEncoded, modifyCount) + highCnt, err := c.equalRowCount(sc, highVal, highEncoded, realtimeRowCount) if err != nil { return 0, errors.Trace(err) } cnt += highCnt } + + if cnt > c.TotalRowCount() { + cnt = c.TotalRowCount() + } else if cnt < 0 { + cnt = 0 + } + + // If the current table row count has changed, we should scale the row count accordingly. + cnt *= c.GetIncreaseFactor(realtimeRowCount) + + // handling the out-of-range part + if (c.outOfRange(lowVal) && !lowVal.IsNull()) || c.outOfRange(highVal) { + increaseCount := realtimeRowCount - int64(c.TotalRowCount()) + if increaseCount < 0 { + increaseCount = 0 + } + cnt += c.Histogram.outOfRangeRowCount(&lowVal, &highVal, increaseCount) + } + rowCount += cnt } - if rowCount > c.TotalRowCount() { - rowCount = c.TotalRowCount() + if rowCount > float64(realtimeRowCount) { + rowCount = float64(realtimeRowCount) } else if rowCount < 0 { rowCount = 0 } return rowCount, nil } -func (c *Column) outOfRange(val types.Datum, encodedVal []byte) bool { - outOfHist := c.Histogram.outOfRange(val) - if !outOfHist { - return false - } - // Already out of hist. - return c.TopN.outOfRange(encodedVal) -} - // Index represents an index histogram. type Index struct { Histogram @@ -1173,28 +1279,40 @@ func (idx *Index) MemoryUsage() (sum int64) { var nullKeyBytes, _ = codec.EncodeKey(nil, nil, types.NewDatum(nil)) -func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 { +func (idx *Index) equalRowCount(b []byte, realtimeRowCount int64) float64 { if len(idx.Info.Columns) == 1 { if bytes.Equal(b, nullKeyBytes) { return float64(idx.NullCount) } } val := types.NewBytesDatum(b) - if idx.NDV > 0 && idx.outOfRange(val) { - return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount() - } - if idx.CMSketch != nil && idx.StatsVer < Version2 { - return float64(idx.QueryBytes(b)) - } - // If it's version2, query the top-n first. - if idx.StatsVer >= Version2 { - count, found := idx.TopN.QueryTopN(b) - if found { - return float64(count) + if idx.StatsVer < Version2 { + if idx.NDV > 0 && idx.outOfRange(val) { + return outOfRangeEQSelectivity(idx.NDV, realtimeRowCount, int64(idx.TotalRowCount())) * idx.TotalRowCount() } - return idx.Histogram.equalRowCount(val, true) + if idx.CMSketch != nil { + return float64(idx.QueryBytes(b)) + } + histRowCount, _ := idx.Histogram.equalRowCount(val, false) + return histRowCount + } + // stats version == 2 + // 1. try to find this value in TopN + count, found := idx.TopN.QueryTopN(b) + if found { + return float64(count) + } + // 2. try to find this value in bucket.Repeat(the last value in every bucket) + histCnt, matched := idx.Histogram.equalRowCount(val, true) + if matched { + return histCnt + } + // 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats) + histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num())) + if histNDV <= 0 { + return 0 } - return idx.Histogram.equalRowCount(val, false) + return idx.Histogram.notNullCount() / histNDV } // QueryBytes is used to query the count of specified bytes. @@ -1208,7 +1326,7 @@ func (idx *Index) QueryBytes(d []byte) uint64 { // GetRowCount returns the row count of the given ranges. // It uses the modifyCount to adjust the influence of modifications on the table. -func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, indexRanges []*ranger.Range, modifyCount int64) (float64, error) { +func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, indexRanges []*ranger.Range, realtimeRowCount int64) (float64, error) { totalCount := float64(0) isSingleCol := len(idx.Info.Columns) == 1 for _, indexRange := range indexRanges { @@ -1222,6 +1340,7 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde } fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns) if bytes.Equal(lb, rb) { + // case 1: it's a point if indexRange.LowExclude || indexRange.HighExclude { continue } @@ -1231,11 +1350,15 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde totalCount += 1 continue } - count := idx.equalRowCount(lb, modifyCount) + count := idx.equalRowCount(lb, realtimeRowCount) + // If the current table row count has changed, we should scale the row count accordingly. + count *= idx.GetIncreaseFactor(realtimeRowCount) totalCount += count continue } } + + // case 2: it's an interval // The final interval is [low, high) if indexRange.LowExclude { lb = kv.Key(lb).PrefixNext() @@ -1246,9 +1369,6 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde l := types.NewBytesDatum(lb) r := types.NewBytesDatum(rb) lowIsNull := bytes.Equal(lb, nullKeyBytes) - if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) { - totalCount += outOfRangeEQSelectivity(outOfRangeBetweenRate, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount() - } if isSingleCol && lowIsNull { totalCount += float64(idx.NullCount) } @@ -1268,9 +1388,21 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, coll *HistColl, inde if !expBackoffSuccess { totalCount += idx.BetweenRowCount(l, r) } + + // If the current table row count has changed, we should scale the row count accordingly. + totalCount *= idx.GetIncreaseFactor(realtimeRowCount) + + // handling the out-of-range part + if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) { + increaseCount := realtimeRowCount - int64(idx.TotalRowCount()) + if increaseCount < 0 { + increaseCount = 0 + } + totalCount += idx.Histogram.outOfRangeRowCount(&l, &r, increaseCount) + } } - if totalCount > idx.TotalRowCount() { - totalCount = idx.TotalRowCount() + if totalCount > float64(realtimeRowCount) { + totalCount = float64(realtimeRowCount) } return totalCount, nil } @@ -1514,21 +1646,13 @@ func (coll *HistColl) NewHistCollBySelectivity(sc *stmtctx.StatementContext, sta } func (idx *Index) outOfRange(val types.Datum) bool { - outOfTopN := idx.TopN.outOfRange(val.GetBytes()) - // The val is in TopN, return false. - if !outOfTopN { + if !idx.Histogram.outOfRange(val) { return false } - - histEmpty := idx.Histogram.Len() == 0 - // HistEmpty->Hist out of range. - if histEmpty { - return true + if idx.Histogram.Len() > 0 && matchPrefix(idx.Bounds.GetRow(0), 0, &val) { + return false } - withInLowBoundOrPrefixMatch := chunk.Compare(idx.Bounds.GetRow(0), 0, &val) <= 0 || - matchPrefix(idx.Bounds.GetRow(0), 0, &val) - withInHighBound := chunk.Compare(idx.Bounds.GetRow(idx.Bounds.NumRows()-1), 0, &val) >= 0 - return !withInLowBoundOrPrefixMatch || !withInHighBound + return true } // matchPrefix checks whether ad is the prefix of value @@ -1994,7 +2118,8 @@ func MergePartitionHist2GlobalHist(sc *stmtctx.StatementContext, hists []*Histog for _, bucket := range globalBuckets { var repeat float64 for _, hist := range hists { - repeat += hist.equalRowCount(*bucket.upper, isIndex) // only hists of indexes have bucket.NDV + histRowCount, _ := hist.equalRowCount(*bucket.upper, isIndex) + repeat += histRowCount // only hists of indexes have bucket.NDV } if int64(repeat) > bucket.Repeat { bucket.Repeat = int64(repeat) diff --git a/statistics/scalar.go b/statistics/scalar.go index 25d0736197777..e8f585f450a32 100644 --- a/statistics/scalar.go +++ b/statistics/scalar.go @@ -45,6 +45,16 @@ func calcFraction(lower, upper, value float64) float64 { func convertDatumToScalar(value *types.Datum, commonPfxLen int) float64 { switch value.Kind() { + case types.KindFloat32: + return float64(value.GetFloat32()) + case types.KindFloat64: + return value.GetFloat64() + case types.KindInt64: + return float64(value.GetInt64()) + case types.KindUint64: + return float64(value.GetUint64()) + case types.KindMysqlDuration: + return float64(value.GetMysqlDuration().Duration) case types.KindMysqlDecimal: scalar, err := value.GetMysqlDecimal().ToFloat64() if err != nil { @@ -70,6 +80,10 @@ func convertDatumToScalar(value *types.Datum, commonPfxLen int) float64 { return 0 } return convertBytesToScalar(bytes[commonPfxLen:]) + case types.KindMinNotNull: + return -math.MaxFloat64 + case types.KindMaxValue: + return math.MaxFloat64 default: // do not know how to convert return 0 @@ -129,14 +143,22 @@ func (hg *Histogram) calcFraction(index int, value *types.Datum) float64 { return 0.5 } -func commonPrefixLength(lower, upper []byte) int { - minLen := len(lower) - if minLen > len(upper) { - minLen = len(upper) +func commonPrefixLength(strs ...[]byte) int { + if len(strs) == 0 { + return 0 + } + minLen := len(strs[0]) + for _, str := range strs { + if len(str) < minLen { + minLen = len(str) + } } for i := 0; i < minLen; i++ { - if lower[i] != upper[i] { - return i + a := strs[0][i] + for _, str := range strs { + if str[i] != a { + return i + } } } return minLen diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 5466da7d3132a..f8c551b11d0e6 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -243,44 +243,54 @@ func (s *testStatsSuite) TestSelectivity(c *C) { longExpr += fmt.Sprintf(" and a > %d ", i) } tests := []struct { - exprs string - selectivity float64 + exprs string + selectivity float64 + selectivityAfterIncrease float64 }{ { - exprs: "a > 0 and a < 2", - selectivity: 0.01851851851, + exprs: "a > 0 and a < 2", + selectivity: 0.01851851851, + selectivityAfterIncrease: 0.01851851851, }, { - exprs: "a >= 1 and a < 2", - selectivity: 0.01851851851, + exprs: "a >= 1 and a < 2", + selectivity: 0.01851851851, + selectivityAfterIncrease: 0.01851851851, }, { - exprs: "a >= 1 and b > 1 and a < 2", - selectivity: 0.01783264746, + exprs: "a >= 1 and b > 1 and a < 2", + selectivity: 0.01783264746, + selectivityAfterIncrease: 0.01851851852, }, { - exprs: "a >= 1 and c > 1 and a < 2", - selectivity: 0.00617283950, + exprs: "a >= 1 and c > 1 and a < 2", + selectivity: 0.00617283950, + selectivityAfterIncrease: 0.00617283950, }, { - exprs: "a >= 1 and c >= 1 and a < 2", - selectivity: 0.01234567901, + exprs: "a >= 1 and c >= 1 and a < 2", + selectivity: 0.01234567901, + selectivityAfterIncrease: 0.01234567901, }, { - exprs: "d = 0 and e = 1", - selectivity: 0.11111111111, + exprs: "d = 0 and e = 1", + selectivity: 0.11111111111, + selectivityAfterIncrease: 0.11111111111, }, { - exprs: "b > 1", - selectivity: 0.96296296296, + exprs: "b > 1", + selectivity: 0.96296296296, + selectivityAfterIncrease: 1, }, { - exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5", - selectivity: 0, + exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5", + selectivity: 0, + selectivityAfterIncrease: 0, }, { - exprs: longExpr, - selectivity: 0.001, + exprs: longExpr, + selectivity: 0.001, + selectivityAfterIncrease: 0.001, }, } @@ -311,7 +321,7 @@ func (s *testStatsSuite) TestSelectivity(c *C) { histColl.Count *= 10 ratio, _, err = histColl.Selectivity(sctx, sel.Conditions, nil) c.Assert(err, IsNil, comment) - c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)) + c.Assert(math.Abs(ratio-tt.selectivityAfterIncrease) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivityAfterIncrease, ratio)) } } @@ -372,16 +382,16 @@ func getRange(start, end int64) []*ranger.Range { return []*ranger.Range{ran} } -func (s *testStatsSuite) TestOutOfRangeEQEstimation(c *C) { +func (s *testStatsSuite) TestOutOfRangeEstimation(c *C) { defer cleanEnv(c, s.store, s.do) testKit := testkit.NewTestKit(c, s.store) testKit.MustExec("use test") testKit.MustExec("drop table if exists t") - testKit.MustExec("create table t(a int)") - for i := 0; i < 1000; i++ { - testKit.MustExec(fmt.Sprintf("insert into t values (%v)", i/4)) // 0 ~ 249 + testKit.MustExec("create table t(a int unsigned)") + for i := 0; i < 3000; i++ { + testKit.MustExec(fmt.Sprintf("insert into t values (%v)", i/5+300)) // [300, 900) } - testKit.MustExec("analyze table t") + testKit.MustExec("analyze table t with 2000 samples") h := s.do.StatsHandle() table, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) @@ -389,14 +399,34 @@ func (s *testStatsSuite) TestOutOfRangeEQEstimation(c *C) { statsTbl := h.GetTableStats(table.Meta()) sc := &stmtctx.StatementContext{} col := statsTbl.Columns[table.Meta().Columns[0].ID] - count, err := col.GetColumnRowCount(sc, getRange(250, 250), 0, false) + count, err := col.GetColumnRowCount(sc, getRange(900, 900), statsTbl.Count, false) c.Assert(err, IsNil) - c.Assert(count, Equals, float64(0)) + // Because the ANALYZE collect data by random sampling, so the result is not an accurate value. + // so we use a range here. + c.Assert(count < 5.5, IsTrue, Commentf("expected: around 5.0, got: %v", count)) + c.Assert(count > 4.5, IsTrue, Commentf("expected: around 5.0, got: %v", count)) - for i := 0; i < 8; i++ { - count, err := col.GetColumnRowCount(sc, getRange(250, 250), int64(i+1), false) + var input []struct { + Start int64 + End int64 + } + var output []struct { + Start int64 + End int64 + Count float64 + } + s.testData.GetTestCases(c, &input, &output) + increasedTblRowCount := int64(float64(statsTbl.Count) * 1.5) + for i, ran := range input { + count, err = col.GetColumnRowCount(sc, getRange(ran.Start, ran.End), increasedTblRowCount, false) c.Assert(err, IsNil) - c.Assert(count, Equals, math.Min(float64(i+1), 4)) // estRows must be less than modifyCnt + s.testData.OnRecord(func() { + output[i].Start = ran.Start + output[i].End = ran.End + output[i].Count = count + }) + c.Assert(count < output[i].Count*1.2, IsTrue, Commentf("for [%v, %v], needed: around %v, got: %v", ran.Start, ran.End, output[i].Count, count)) + c.Assert(count > output[i].Count*0.8, IsTrue, Commentf("for [%v, %v], needed: around %v, got: %v", ran.Start, ran.End, output[i].Count, count)) } } @@ -431,20 +461,20 @@ func (s *testStatsSuite) TestEstimationForUnknownValues(c *C) { count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(9, 30)) c.Assert(err, IsNil) - c.Assert(count, Equals, 2.4000000000000004) + c.Assert(count, Equals, 7.2) count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(9, math.MaxInt64)) c.Assert(err, IsNil) - c.Assert(count, Equals, 2.4000000000000004) + c.Assert(count, Equals, 7.2) idxID := table.Meta().Indices[0].ID count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(30, 30)) c.Assert(err, IsNil) - c.Assert(count, Equals, 0.2) + c.Assert(count, Equals, 0.1) count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(9, 30)) c.Assert(err, IsNil) - c.Assert(count, Equals, 2.2) + c.Assert(count, Equals, 7.0) testKit.MustExec("truncate table t") testKit.MustExec("insert into t values (null, null)") diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 2b9f9771ed12a..a6f1d31afbec7 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -259,7 +259,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { checkRepeats(c, col) col.PreCalculateScalar() c.Check(col.Len(), Equals, 226) - count := col.equalRowCount(types.NewIntDatum(1000), false) + count, _ := col.equalRowCount(types.NewIntDatum(1000), false) c.Check(int(count), Equals, 0) count = col.lessRowCount(types.NewIntDatum(1000)) c.Check(int(count), Equals, 10000) @@ -271,7 +271,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { c.Check(int(count), Equals, 100000) count = col.greaterRowCount(types.NewIntDatum(200000000)) c.Check(count, Equals, 0.0) - count = col.equalRowCount(types.NewIntDatum(200000000), false) + count, _ = col.equalRowCount(types.NewIntDatum(200000000), false) c.Check(count, Equals, 0.0) count = col.BetweenRowCount(types.NewIntDatum(3000), types.NewIntDatum(3500)) c.Check(int(count), Equals, 4994) @@ -327,7 +327,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { checkRepeats(c, col) col.PreCalculateScalar() c.Check(int(tblCount), Equals, 100000) - count = col.equalRowCount(encodeKey(types.NewIntDatum(10000)), false) + count, _ = col.equalRowCount(encodeKey(types.NewIntDatum(10000)), false) c.Check(int(count), Equals, 1) count = col.lessRowCount(encodeKey(types.NewIntDatum(20000))) c.Check(int(count), Equals, 19999) @@ -344,7 +344,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { checkRepeats(c, col) col.PreCalculateScalar() c.Check(int(tblCount), Equals, 100000) - count = col.equalRowCount(types.NewIntDatum(10000), false) + count, _ = col.equalRowCount(types.NewIntDatum(10000), false) c.Check(int(count), Equals, 1) count = col.lessRowCount(types.NewIntDatum(20000)) c.Check(int(count), Equals, 20000) @@ -639,7 +639,7 @@ func (s *testStatisticsSuite) TestIntColumnRanges(c *C) { tbl.Count *= 10 count, err = tbl.GetRowCountByIntColumnRanges(sc, 0, ran) c.Assert(err, IsNil) - c.Assert(int(count), Equals, 10) + c.Assert(int(count), Equals, 1) } func (s *testStatisticsSuite) TestIndexRanges(c *C) { diff --git a/statistics/table.go b/statistics/table.go index 6c406514564cf..93887b28d0270 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -340,8 +340,7 @@ func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, } return getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.Count)), nil } - result, err := c.GetColumnRowCount(sc, intRanges, coll.ModifyCount, true) - result *= c.GetIncreaseFactor(coll.Count) + result, err := c.GetColumnRowCount(sc, intRanges, coll.Count, true) return result, errors.Trace(err) } @@ -351,8 +350,7 @@ func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, co if !ok || c.IsInvalid(sc, coll.Pseudo) { return GetPseudoRowCountByColumnRanges(sc, float64(coll.Count), colRanges, 0) } - result, err := c.GetColumnRowCount(sc, colRanges, coll.ModifyCount, false) - result *= c.GetIncreaseFactor(coll.Count) + result, err := c.GetColumnRowCount(sc, colRanges, coll.Count, false) return result, errors.Trace(err) } @@ -371,9 +369,8 @@ func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idx if idx.CMSketch != nil && idx.StatsVer == Version1 { result, err = coll.getIndexRowCount(sc, idxID, indexRanges) } else { - result, err = idx.GetRowCount(sc, coll, indexRanges, coll.ModifyCount) + result, err = idx.GetRowCount(sc, coll, indexRanges, coll.Count) } - result *= idx.GetIncreaseFactor(coll.Count) return result, errors.Trace(err) } @@ -489,16 +486,17 @@ func isSingleColIdxNullRange(idx *Index, ran *ranger.Range) bool { // It assumes all modifications are insertions and all new-inserted rows are uniformly distributed // and has the same distribution with analyzed rows, which means each unique value should have the // same number of rows(Tot/NDV) of it. -func outOfRangeEQSelectivity(ndv, modifyRows, totalRows int64) float64 { - if modifyRows == 0 { +func outOfRangeEQSelectivity(ndv, realtimeRowCount, columnRowCount int64) float64 { + increaseRowCount := realtimeRowCount - columnRowCount + if increaseRowCount <= 0 { return 0 // it must be 0 since the histogram contains the whole data } if ndv < outOfRangeBetweenRate { ndv = outOfRangeBetweenRate // avoid inaccurate selectivity caused by small NDV } - selectivity := 1 / float64(ndv) // TODO: After extracting TopN from histograms, we can minus the TopN fraction here. - if selectivity*float64(totalRows) > float64(modifyRows) { - selectivity = float64(modifyRows) / float64(totalRows) + selectivity := 1 / float64(ndv) + if selectivity*float64(columnRowCount) > float64(increaseRowCount) { + selectivity = float64(increaseRowCount) / float64(columnRowCount) } return selectivity } @@ -536,7 +534,7 @@ func (coll *HistColl) crossValidationSelectivity(sc *stmtctx.StatementContext, i HighExclude: highExclude, } - rowCount, err := col.GetColumnRowCount(sc, []*ranger.Range{&rang}, coll.ModifyCount, col.IsHandle) + rowCount, err := col.GetColumnRowCount(sc, []*ranger.Range{&rang}, coll.Count, col.IsHandle) if err != nil { return 0, 0, err } @@ -562,7 +560,7 @@ func (coll *HistColl) getEqualCondSelectivity(sc *stmtctx.StatementContext, idx // When the value is out of range, we could not found this value in the CM Sketch, // so we use heuristic methods to estimate the selectivity. if idx.NDV > 0 && coverAll { - return outOfRangeEQSelectivity(idx.NDV, coll.ModifyCount, int64(idx.TotalRowCount())), nil + return outOfRangeEQSelectivity(idx.NDV, coll.Count, int64(idx.TotalRowCount())), nil } // The equal condition only uses prefix columns of the index. colIDs := coll.Idx2ColumnIDs[idx.ID] @@ -575,7 +573,7 @@ func (coll *HistColl) getEqualCondSelectivity(sc *stmtctx.StatementContext, idx ndv = mathutil.MaxInt64(ndv, col.Histogram.NDV) } } - return outOfRangeEQSelectivity(ndv, coll.ModifyCount, int64(idx.TotalRowCount())), nil + return outOfRangeEQSelectivity(ndv, coll.Count, int64(idx.TotalRowCount())), nil } minRowCount, crossValidationSelectivity, err := coll.crossValidationSelectivity(sc, idx, usedColsLen, idxPointRange) @@ -607,7 +605,7 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64 // on single-column index, use previous way as well, because CMSketch does not contain null // values in this case. if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) { - count, err := idx.GetRowCount(sc, nil, []*ranger.Range{ran}, coll.ModifyCount) + count, err := idx.GetRowCount(sc, nil, []*ranger.Range{ran}, coll.Count) if err != nil { return 0, errors.Trace(err) } diff --git a/statistics/testdata/stats_suite_in.json b/statistics/testdata/stats_suite_in.json index 02e6e2e03a127..ff76b09ac4c15 100644 --- a/statistics/testdata/stats_suite_in.json +++ b/statistics/testdata/stats_suite_in.json @@ -170,5 +170,94 @@ "End": 25 } ] + }, + { + "Name": "TestOutOfRangeEstimation", + "Cases": [ + { + "Start": 800, + "End": 900 + }, + { + "Start": 900, + "End": 950 + }, + { + "Start": 950, + "End": 1000 + }, + { + "Start": 1000, + "End": 1050 + }, + { + "Start": 1050, + "End": 1100 + }, + { + "Start": 1150, + "End": 1200 + }, + { + "Start": 1200, + "End": 1300 + }, + { + "Start": 1300, + "End": 1400 + }, + { + "Start": 1400, + "End": 1500 + }, + { + "Start": 1500, + "End": 1600 + }, + { + "Start": 300, + "End": 899 + }, + { + "Start": 800, + "End": 1000 + }, + { + "Start": 900, + "End": 1500 + }, + { + "Start": 300, + "End": 1500 + }, + { + "Start": 200, + "End": 300 + }, + { + "Start": 100, + "End": 200 + }, + { + "Start": 200, + "End": 400 + }, + { + "Start": 200, + "End": 1000 + }, + { + "Start": 0, + "End": 100 + }, + { + "Start": -100, + "End": 100 + }, + { + "Start": -100, + "End": 0 + } + ] } ] diff --git a/statistics/testdata/stats_suite_out.json b/statistics/testdata/stats_suite_out.json index 187ecdc0ce363..c4135c1f8b0b3 100644 --- a/statistics/testdata/stats_suite_out.json +++ b/statistics/testdata/stats_suite_out.json @@ -160,8 +160,8 @@ " └─TableFullScan_5 8.00 cop[tikv] table:tint keep order:false" ], [ - "TableReader_7 0.75 root data:Selection_6", - "└─Selection_6 0.75 cop[tikv] eq(test.tint.a, 4)", + "TableReader_7 1.00 root data:Selection_6", + "└─Selection_6 1.00 cop[tikv] eq(test.tint.a, 4)", " └─TableFullScan_5 8.00 cop[tikv] table:tint keep order:false" ], [ @@ -175,9 +175,9 @@ "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdouble keep order:false" ], [ - "IndexLookUp_10 0.75 root ", - "├─IndexRangeScan_8(Build) 0.75 cop[tikv] table:tdouble, index:singular(a) range:[4,4], keep order:false", - "└─TableRowIDScan_9(Probe) 0.75 cop[tikv] table:tdouble keep order:false" + "IndexLookUp_10 1.00 root ", + "├─IndexRangeScan_8(Build) 1.00 cop[tikv] table:tdouble, index:singular(a) range:[4,4], keep order:false", + "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdouble keep order:false" ], [ "IndexLookUp_10 1.00 root ", @@ -190,9 +190,9 @@ "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdecimal keep order:false" ], [ - "IndexLookUp_10 0.75 root ", - "├─IndexRangeScan_8(Build) 0.75 cop[tikv] table:tdecimal, index:singular(a) range:[4.00000000000000000000,4.00000000000000000000], keep order:false", - "└─TableRowIDScan_9(Probe) 0.75 cop[tikv] table:tdecimal keep order:false" + "IndexLookUp_10 1.00 root ", + "├─IndexRangeScan_8(Build) 1.00 cop[tikv] table:tdecimal, index:singular(a) range:[4.00000000000000000000,4.00000000000000000000], keep order:false", + "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdecimal keep order:false" ], [ "IndexLookUp_10 1.00 root ", @@ -205,8 +205,8 @@ " └─TableFullScan_5 8.00 cop[tikv] table:tstring keep order:false" ], [ - "TableReader_7 0.75 root data:Selection_6", - "└─Selection_6 0.75 cop[tikv] eq(test.tstring.a, \"4\")", + "TableReader_7 1.00 root data:Selection_6", + "└─Selection_6 1.00 cop[tikv] eq(test.tstring.a, \"4\")", " └─TableFullScan_5 8.00 cop[tikv] table:tstring keep order:false" ], [ @@ -240,10 +240,9 @@ " └─TableFullScan_5 6.00 cop[tikv] table:tprefix keep order:false" ], [ - "IndexLookUp_11 0.00 root ", - "├─IndexRangeScan_8(Build) 0.00 cop[tikv] table:tprefix, index:prefixa(a) range:[\"88\",\"88\"], keep order:false", - "└─Selection_10(Probe) 0.00 cop[tikv] eq(test.tprefix.a, \"888\")", - " └─TableRowIDScan_9 0.00 cop[tikv] table:tprefix keep order:false" + "TableReader_7 1.00 root data:Selection_6", + "└─Selection_6 1.00 cop[tikv] eq(test.tprefix.a, \"888\")", + " └─TableFullScan_5 6.00 cop[tikv] table:tprefix keep order:false" ], [ "TableReader_7 1.00 root data:Selection_6", @@ -251,8 +250,8 @@ " └─TableFullScan_5 8.00 cop[tikv] table:tint keep order:false" ], [ - "TableReader_7 0.75 root data:Selection_6", - "└─Selection_6 0.75 cop[tikv] eq(test.tint.b, 4), eq(test.tint.c, 4)", + "TableReader_7 1.00 root data:Selection_6", + "└─Selection_6 1.00 cop[tikv] eq(test.tint.b, 4), eq(test.tint.c, 4)", " └─TableFullScan_5 8.00 cop[tikv] table:tint keep order:false" ], [ @@ -266,9 +265,9 @@ "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdouble keep order:false" ], [ - "IndexLookUp_10 0.75 root ", - "├─IndexRangeScan_8(Build) 0.75 cop[tikv] table:tdouble, index:multi(b, c) range:[4 4,4 4], keep order:false", - "└─TableRowIDScan_9(Probe) 0.75 cop[tikv] table:tdouble keep order:false" + "IndexLookUp_10 1.00 root ", + "├─IndexRangeScan_8(Build) 1.00 cop[tikv] table:tdouble, index:multi(b, c) range:[4 4,4 4], keep order:false", + "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdouble keep order:false" ], [ "IndexLookUp_10 1.00 root ", @@ -281,9 +280,9 @@ "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdecimal keep order:false" ], [ - "IndexLookUp_10 0.75 root ", - "├─IndexRangeScan_8(Build) 0.75 cop[tikv] table:tdecimal, index:multi(b, c) range:[4.00000000000000000000 4.00000000000000000000,4.00000000000000000000 4.00000000000000000000], keep order:false", - "└─TableRowIDScan_9(Probe) 0.75 cop[tikv] table:tdecimal keep order:false" + "IndexLookUp_10 1.00 root ", + "├─IndexRangeScan_8(Build) 1.00 cop[tikv] table:tdecimal, index:multi(b, c) range:[4.00000000000000000000 4.00000000000000000000,4.00000000000000000000 4.00000000000000000000], keep order:false", + "└─TableRowIDScan_9(Probe) 1.00 cop[tikv] table:tdecimal keep order:false" ], [ "IndexLookUp_10 1.00 root ", @@ -296,8 +295,8 @@ " └─TableFullScan_5 8.00 cop[tikv] table:tstring keep order:false" ], [ - "TableReader_7 0.75 root data:Selection_6", - "└─Selection_6 0.75 cop[tikv] eq(test.tstring.b, \"4\"), eq(test.tstring.c, \"4\")", + "TableReader_7 1.00 root data:Selection_6", + "└─Selection_6 1.00 cop[tikv] eq(test.tstring.b, \"4\"), eq(test.tstring.c, \"4\")", " └─TableFullScan_5 8.00 cop[tikv] table:tstring keep order:false" ], [ @@ -457,16 +456,16 @@ "└─IndexRangeScan_5 4.00 cop[tikv] table:topn_before_hist, index:idx(a) range:[1,1], keep order:false" ], [ - "IndexReader_6 0.00 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 0.00 cop[tikv] table:topn_before_hist, index:idx(a) range:[2,2], keep order:false" + "IndexReader_6 1.00 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 1.00 cop[tikv] table:topn_before_hist, index:idx(a) range:[2,2], keep order:false" ], [ "IndexReader_6 4.00 root index:IndexRangeScan_5", "└─IndexRangeScan_5 4.00 cop[tikv] table:topn_after_hist, index:idx(a) range:[7,7], keep order:false" ], [ - "IndexReader_6 0.00 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 0.00 cop[tikv] table:topn_after_hist, index:idx(a) range:[6,6], keep order:false" + "IndexReader_6 1.00 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 1.00 cop[tikv] table:topn_after_hist, index:idx(a) range:[6,6], keep order:false" ], [ "TableReader_7 4.00 root data:Selection_6", @@ -650,5 +649,115 @@ "Count": 3 } ] + }, + { + "Name": "TestOutOfRangeEstimation", + "Cases": [ + { + "Start": 800, + "End": 900, + "Count": 743.004166655054 + }, + { + "Start": 900, + "End": 950, + "Count": 247.04782734719248 + }, + { + "Start": 950, + "End": 1000, + "Count": 226.14487557169574 + }, + { + "Start": 1000, + "End": 1050, + "Count": 205.24192379619902 + }, + { + "Start": 1050, + "End": 1100, + "Count": 184.33897202070227 + }, + { + "Start": 1150, + "End": 1200, + "Count": 142.53306846970884 + }, + { + "Start": 1200, + "End": 1300, + "Count": 214.85728161292752 + }, + { + "Start": 1300, + "End": 1400, + "Count": 131.2454745109406 + }, + { + "Start": 1400, + "End": 1500, + "Count": 47.650389770374105 + }, + { + "Start": 1500, + "End": 1600, + "Count": 7.5 + }, + { + "Start": 300, + "End": 899, + "Count": 4498.5 + }, + { + "Start": 800, + "End": 1000, + "Count": 1201.196869573942 + }, + { + "Start": 900, + "End": 1500, + "Count": 1502.495833344946 + }, + { + "Start": 300, + "End": 1500, + "Count": 4500 + }, + { + "Start": 200, + "End": 300, + "Count": 466.52882098990807 + }, + { + "Start": 100, + "End": 200, + "Count": 382.91701388792114 + }, + { + "Start": 200, + "End": 400, + "Count": 1211.5288209899081 + }, + { + "Start": 200, + "End": 1000, + "Count": 4500 + }, + { + "Start": 0, + "End": 100, + "Count": 299.3052067859343 + }, + { + "Start": -100, + "End": 100, + "Count": 299.3052067859343 + }, + { + "Start": -100, + "End": 0, + "Count": 7.5 + } + ] } ]