wiki:recursive query to find spatial duplicates

Version 2 (modified by cedric, 6 years ago) (diff)

--

Recursive query to find duplicates

/*
Dans la récurisve ci dessous la premire partie cree un tableau avec (id1 id2 s_bruto)
Ou les id1 sont à moins de xx m de distance de id2
Plusieurs ouvrages peuvent être à distance proche...

Dans la deuxième partie, récursive, (search_path est la vraie récursive mais il faut mettre récursive en tête du with)
on fait une jointure entre id2 et id1 qui s'arrête quand il n'y a plus de lien. Cet arrêt est dans la colonne cycle.
Un petit truc que j'ai mis du temps à trouver c'est le ::text, sinon j'avais des incompatible type character varying 10
et character varying, et si j'essayais de caster en character varying je perdais array, et si j'assayais character varying[] ça
marchait pas...

Ci dessous il y a plusieurs niveaux d'agréggation, par exemple chain5_et_ses_petits correspond à 
id1;id2;sbruto_m;depth;path
SO_180;SO_181;;5;{SO_188,SO_187,SO_183,SO_181,SO_180}
SO_180;SO_181;;4;{SO_187,SO_183,SO_181,SO_180}
SO_181;SO_183;;4;{SO_188,SO_187,SO_183,SO_181}
SO_180;SO_183;;4;{SO_188,SO_187,SO_183,SO_180}
SO_180;SO_181;;4;{SO_187,SO_183,SO_181,SO_180}
SO_180;SO_181;;4;{SO_188,SO_183,SO_181,SO_180}
SO_180;SO_181;;4;{SO_188,SO_187,SO_181,SO_180}
SO_180;SO_183;;3;{SO_187,SO_183,SO_180}
SO_180;SO_181;;3;{SO_187,SO_181,SO_180}
SO_183;SO_187;;3;{SO_188,SO_187,SO_183}
SO_181;SO_187;;3;{SO_188,SO_187,SO_181}
SO_180;SO_187;;3;{SO_188,SO_187,SO_180}
SO_181;SO_183;;3;{SO_188,SO_183,SO_181}
SO_180;SO_183;;3;{SO_188,SO_183,SO_180}
SO_180;SO_181;;3;{SO_188,SO_181,SO_180}
SO_180;SO_181;;3;{SO_183,SO_181,SO_180}
SO_180;SO_181;;3;{SO_183,SO_181,SO_180}
SO_180;SO_181;;3;{SO_183,SO_181,SO_180}
SO_181;SO_183;;3;{SO_187,SO_183,SO_181}
SO_180;SO_183;;3;{SO_187,SO_183,SO_180}
....
Donc j'enlève ensuite toutes les lignes contenant un élement de {SO_188,SO_187,SO_183,SO_181,SO_180}
Puis je continue à l'étape 4 ...

*/

WITH grouped_array as(
WITH RECURSIVE graph as (SELECT sp1.id AS id1, sp2.id AS id2 FROM 
        sudoang.spain_obstruction_in_spain AS sp1 JOIN
        sudoang.spain_obstruction_in_spain AS sp2 ON
        ST_dwithin(sp1.geom, sp2.geom,10)
        where sp1.id != sp2.id 
        and substring(sp1.id from 4) < substring(sp2.id from 4)
        order by substring(sp1.id from 4)::numeric,
        substring(sp2.id from 4)::numeric),
        
 search_graph(id1, id2, depth, path, cycle) AS (
        SELECT g.id1, g.id2,  1,
          ARRAY[g.id1::text],
          false
        from graph g
      UNION ALL
        SELECT g.id1, g.id2, sg.depth + 1,
          path || ARRAY[g.id1::text],
          sg.id2 in (select g.id1 from graph)
        FROM graph g, search_graph sg
        WHERE g.id2 = sg.id1 
        AND NOT cycle
)
SELECT  id1, id2, path, depth -- (cycle) j'ai plus besoin de cycle qui servait dans la partie récursive 
FROM search_graph order by depth desc 
),--grouped_array,
chain5 as (select * from grouped_array where depth =5),
chain4 as (select * from grouped_array where depth =4),
chain3 as (select * from grouped_array where depth =3),
chain2 as (select * from grouped_array where depth =2),

chain5_et_ses_petits as (
select grouped_array.* from chain5 JOIN grouped_array on chain5.path && grouped_array.path),

chain4_et_ses_petits as (
select grouped_array.* from chain4 JOIN grouped_array on chain4.path && grouped_array.path
where chain4.path not in (select path from chain5_et_ses_petits)),

chain3_et_ses_petits as (
select grouped_array.* from chain3 JOIN grouped_array on chain3.path && grouped_array.path
where chain3.path not in (select path from chain4_et_ses_petits)
and chain3.path not in (select path from chain5_et_ses_petits)),

chain2_et_ses_petits as (
select grouped_array.* from chain2 JOIN grouped_array on chain2.path && grouped_array.path
where chain2.path not in (select path from chain3_et_ses_petits)
and chain2.path not in (select path from chain4_et_ses_petits)
and chain2.path not in (select path from chain5_et_ses_petits)),

chain1 as (
select * from grouped_array where depth=1
and grouped_array.path not in (select path from chain2_et_ses_petits)
and grouped_array.path not in (select path from chain3_et_ses_petits)
and grouped_array.path not in (select path from chain4_et_ses_petits)
and grouped_array.path not in (select path from chain5_et_ses_petits)),

bigunion as (
select * from chain5_et_ses_petits where depth=5
UNION
select * from chain4_et_ses_petits where depth=4
UNION
select * from chain3_et_ses_petits where depth=3
UNION
select * from chain2_et_ses_petits where depth=2
UNION
select * from chain1)

select * from bigunion order by depth desc, id1

/*
id1;id2;path;depth
SO_180;SO_181;{SO_188,SO_187,SO_183,SO_181,SO_180};5
SO_2158;SO_2161;{SO_2164,SO_2162,SO_2161,SO_2158};4
SO_14;SO_15;{SO_16,SO_15,SO_14};3
SO_2195;SO_2196;{SO_2197,SO_2196,SO_2195};3
SO_2697;SO_2698;{SO_2700,SO_2698,SO_2697};3
SO_3049;SO_3050;{SO_3051,SO_3050,SO_3049};3
SO_1472;SO_1945;{SO_1945,SO_1472};2
SO_2641;SO_2642;{SO_2642,SO_2641};2
SO_2682;SO_2683;{SO_2683,SO_2682};2
SO_3084;SO_3086;{SO_3086,SO_3084};2
SO_1046;SO_1252;{SO_1046};1
SO_1047;SO_120;{SO_1047};1
SO_1167;SO_3021;{SO_1167};1
SO_118;SO_119;{SO_118};1
SO_1217;SO_1398;{SO_1217};1
SO_1234;SO_1235;{SO_1234};1
SO_1243;SO_3102;{SO_1243};1
SO_1282;SO_1283;{SO_1282};1
SO_1299;SO_1300;{SO_1299};1
SO_1302;SO_1303;{SO_1302};1
SO_1309;SO_2232;{SO_1309};1
SO_1312;SO_3276;{SO_1312};1
SO_1504;SO_3308;{SO_1504};1
SO_1518;SO_1519;{SO_1518};1
SO_1567;SO_1568;{SO_1567};1
SO_1635;SO_1637;{SO_1635};1
SO_1852;SO_3344;{SO_1852};1
SO_1868;SO_1869;{SO_1868};1
SO_1902;SO_1903;{SO_1902};1
SO_198;SO_491;{SO_198};1
SO_2059;SO_2060;{SO_2059};1
SO_2064;SO_3377;{SO_2064};1
SO_2075;SO_2076;{SO_2075};1
SO_2129;SO_2134;{SO_2129};1
SO_2141;SO_3384;{SO_2141};1
SO_2186;SO_2187;{SO_2186};1
SO_2200;SO_2202;{SO_2200};1
SO_229;SO_230;{SO_229};1
SO_2299;SO_3412;{SO_2299};1
SO_2303;SO_3413;{SO_2303};1
SO_2320;SO_2321;{SO_2320};1
SO_2323;SO_2324;{SO_2323};1
SO_2340;SO_2341;{SO_2340};1
SO_2366;SO_2367;{SO_2366};1
SO_2393;SO_2395;{SO_2393};1
SO_2396;SO_2397;{SO_2396};1
SO_2450;SO_2451;{SO_2450};1
SO_2507;SO_2594;{SO_2507};1
SO_2567;SO_2569;{SO_2567};1
SO_2585;SO_2586;{SO_2585};1
SO_2770;SO_2771;{SO_2770};1
SO_2812;SO_2822;{SO_2812};1
SO_2842;SO_2843;{SO_2842};1
SO_2847;SO_2848;{SO_2847};1
SO_288;SO_874;{SO_288};1
SO_2915;SO_2923;{SO_2915};1
SO_2916;SO_2924;{SO_2916};1
SO_292;SO_79;{SO_292};1
SO_2979;SO_2980;{SO_2979};1
SO_2981;SO_2983;{SO_2981};1
SO_3026;SO_3027;{SO_3026};1
SO_3081;SO_962;{SO_3081};1
SO_313;SO_315;{SO_313};1
SO_3150;SO_3153;{SO_3150};1
SO_3170;SO_3171;{SO_3170};1
SO_3185;SO_3614;{SO_3185};1
SO_3329;SO_3330;{SO_3329};1
SO_3386;SO_3387;{SO_3386};1
SO_339;SO_340;{SO_339};1
SO_3460;SO_3461;{SO_3460};1
SO_3532;SO_3533;{SO_3532};1
SO_3619;SO_3620;{SO_3619};1
SO_374;SO_382;{SO_374};1
SO_42;SO_44;{SO_42};1
SO_456;SO_457;{SO_456};1
SO_969;SO_971;{SO_969};1
*/