Вот пример стратификации в Snowflake (или SQL), основанный на следующем:
https://en.wikipedia.org/wiki/Stratified_sampling
Это может быть возвращено как фиксированное число или процент.
Это было выполнено как один запрос, в нашей реализации мы фактически предварительно создали таблицу столбцов отсортированных сегментов (W0) как временную таблицу вместо того, чтобы один и тот же запрос выполнялся несколько раз.
SELECT
W1.Id,
W1.EmploymentStatus,
W1.Gender
FROM
( SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF ) W0,
STAFF W1 -- Linked back the original table (Inbound query)
WHERE (
SELECT
MAX(case when W2.sInternalGroupVal = W3.sInternalGroupVal then W3.iGroupSegmentVolume else 0 end ) -- This is where the magic happens...
FROM (
SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF ) W2,
(SELECT
sInternalGroupVal,
COUNT(Id)*(40/iTotalPopulation::DOUBLE PRECISION) as iGroupSegmentVolume -- as a fixed volumne (40 Records)
--COUNT(Id)*((iTotalPopulation*(23/100.00))/iTotalPopulation::DOUBLE PRECISION) as iGroupSegmentVolume -- as a percentage (23% of overall population)
FROM (SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF ),
(SELECT
COUNT(Id) as iTotalPopulation
FROM ( SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF )
) W4
GROUP BY sInternalGroupVal, iTotalPopulation) W3
WHERE ((W2.sInternalGroupVal = W0.sInternalGroupVal)) AND ((W2.sInternalGroupVal = W3.sInternalGroupVal))
) >= iInternalRank
AND ((W1.Id = W0.Id));
person
Paul Sheppard
schedule
20.03.2021