wiki:recursive query to find spatial duplicates

Version 3 (modified by cedric, 6 years ago) (diff)

--

Recursive query to find duplicates

This queries finds duplicates for dams distance of less than 10 m. Those will have to be grouped according the lowest non null water dam height.

/*
Dans la récurisve ci dessous la premire partie cree un tableau avec (id1 id2 s_bruto)
Ou les id1 sont à moins de xx m de distance de id2
Plusieurs ouvrages peuvent être à distance proche...

Dans la deuxième partie, récursive, (search_path est la vraie récursive mais il faut mettre récursive en tête du with)
on fait une jointure entre id2 et id1 qui s'arrête quand il n'y a plus de lien. Cet arrêt est dans la colonne cycle.
Un petit truc que j'ai mis du temps à trouver c'est le ::text, sinon j'avais des incompatible type character varying 10
et character varying, et si j'essayais de caster en character varying je perdais array, et si j'assayais character varying[] ça
marchait pas...

Ci dessous il y a plusieurs niveaux d'agréggation, par exemple chain5_et_ses_petits correspond à 
id1;id2;sbruto_m;depth;path
SO_180;SO_181;;5;{SO_188,SO_187,SO_183,SO_181,SO_180}
SO_180;SO_181;;4;{SO_187,SO_183,SO_181,SO_180}
SO_181;SO_183;;4;{SO_188,SO_187,SO_183,SO_181}
SO_180;SO_183;;4;{SO_188,SO_187,SO_183,SO_180}
SO_180;SO_181;;4;{SO_187,SO_183,SO_181,SO_180}
SO_180;SO_181;;4;{SO_188,SO_183,SO_181,SO_180}
SO_180;SO_181;;4;{SO_188,SO_187,SO_181,SO_180}
SO_180;SO_183;;3;{SO_187,SO_183,SO_180}
SO_180;SO_181;;3;{SO_187,SO_181,SO_180}
SO_183;SO_187;;3;{SO_188,SO_187,SO_183}
SO_181;SO_187;;3;{SO_188,SO_187,SO_181}
SO_180;SO_187;;3;{SO_188,SO_187,SO_180}
SO_181;SO_183;;3;{SO_188,SO_183,SO_181}
SO_180;SO_183;;3;{SO_188,SO_183,SO_180}
SO_180;SO_181;;3;{SO_188,SO_181,SO_180}
SO_180;SO_181;;3;{SO_183,SO_181,SO_180}
SO_180;SO_181;;3;{SO_183,SO_181,SO_180}
SO_180;SO_181;;3;{SO_183,SO_181,SO_180}
SO_181;SO_183;;3;{SO_187,SO_183,SO_181}
SO_180;SO_183;;3;{SO_187,SO_183,SO_180}
....
Donc j'enlève ensuite toutes les lignes contenant un élement de {SO_188,SO_187,SO_183,SO_181,SO_180}
Puis je continue à l'étape 4 ...

*/

WITH grouped_array as(
WITH RECURSIVE graph as (SELECT sp1.id AS id1, sp2.id AS id2 FROM 
        sudoang.spain_obstruction_in_spain AS sp1 JOIN
        sudoang.spain_obstruction_in_spain AS sp2 ON
        ST_dwithin(sp1.geom, sp2.geom,10)
        where sp1.id != sp2.id 
        and substring(sp1.id from 4) < substring(sp2.id from 4)
        order by substring(sp1.id from 4)::numeric,
        substring(sp2.id from 4)::numeric),
        
 search_graph(id1, id2, depth, path, cycle) AS (
        SELECT g.id1, g.id2,  1,
          ARRAY[g.id1::text],
          false
        from graph g
      UNION ALL
        SELECT g.id1, g.id2, sg.depth + 1,
          path || ARRAY[g.id1::text],
          sg.id2 in (select g.id1 from graph)
        FROM graph g, search_graph sg
        WHERE g.id2 = sg.id1 
        AND NOT cycle
)
SELECT  id1, id2, path, depth -- (cycle) j'ai plus besoin de cycle qui servait dans la partie récursive 
FROM search_graph order by depth desc 
),--grouped_array,
chain5 as (select * from grouped_array where depth =5),
chain4 as (select * from grouped_array where depth =4),
chain3 as (select * from grouped_array where depth =3),
chain2 as (select * from grouped_array where depth =2),

chain5_et_ses_petits as (
select grouped_array.* from chain5 JOIN grouped_array on chain5.path && grouped_array.path),

chain4_et_ses_petits as (
select grouped_array.* from chain4 JOIN grouped_array on chain4.path && grouped_array.path
where chain4.path not in (select path from chain5_et_ses_petits)),

chain3_et_ses_petits as (
select grouped_array.* from chain3 JOIN grouped_array on chain3.path && grouped_array.path
where chain3.path not in (select path from chain4_et_ses_petits)
and chain3.path not in (select path from chain5_et_ses_petits)),

chain2_et_ses_petits as (
select grouped_array.* from chain2 JOIN grouped_array on chain2.path && grouped_array.path
where chain2.path not in (select path from chain3_et_ses_petits)
and chain2.path not in (select path from chain4_et_ses_petits)
and chain2.path not in (select path from chain5_et_ses_petits)),

chain1 as (
select * from grouped_array where depth=1
and grouped_array.path not in (select path from chain2_et_ses_petits)
and grouped_array.path not in (select path from chain3_et_ses_petits)
and grouped_array.path not in (select path from chain4_et_ses_petits)
and grouped_array.path not in (select path from chain5_et_ses_petits)),

bigunion as (
select * from chain5_et_ses_petits where depth=5
UNION
select * from chain4_et_ses_petits where depth=4
UNION
select * from chain3_et_ses_petits where depth=3
UNION
select * from chain2_et_ses_petits where depth=2
UNION
select * from chain1)

select * from bigunion order by depth desc, id1

/*
id1;id2;path;depth
SO_180;SO_181;{SO_188,SO_187,SO_183,SO_181,SO_180};5
SO_2158;SO_2161;{SO_2164,SO_2162,SO_2161,SO_2158};4
SO_14;SO_15;{SO_16,SO_15,SO_14};3
SO_2195;SO_2196;{SO_2197,SO_2196,SO_2195};3
SO_2697;SO_2698;{SO_2700,SO_2698,SO_2697};3
SO_3049;SO_3050;{SO_3051,SO_3050,SO_3049};3
SO_1472;SO_1945;{SO_1945,SO_1472};2
SO_2641;SO_2642;{SO_2642,SO_2641};2
SO_2682;SO_2683;{SO_2683,SO_2682};2
SO_3084;SO_3086;{SO_3086,SO_3084};2
SO_1046;SO_1252;{SO_1046};1
SO_1047;SO_120;{SO_1047};1
SO_1167;SO_3021;{SO_1167};1
SO_118;SO_119;{SO_118};1
SO_1217;SO_1398;{SO_1217};1
SO_1234;SO_1235;{SO_1234};1
SO_1243;SO_3102;{SO_1243};1
SO_1282;SO_1283;{SO_1282};1
SO_1299;SO_1300;{SO_1299};1
SO_1302;SO_1303;{SO_1302};1
SO_1309;SO_2232;{SO_1309};1
SO_1312;SO_3276;{SO_1312};1
SO_1504;SO_3308;{SO_1504};1
SO_1518;SO_1519;{SO_1518};1
SO_1567;SO_1568;{SO_1567};1
SO_1635;SO_1637;{SO_1635};1
SO_1852;SO_3344;{SO_1852};1
SO_1868;SO_1869;{SO_1868};1
SO_1902;SO_1903;{SO_1902};1
SO_198;SO_491;{SO_198};1
SO_2059;SO_2060;{SO_2059};1
SO_2064;SO_3377;{SO_2064};1
SO_2075;SO_2076;{SO_2075};1
SO_2129;SO_2134;{SO_2129};1
SO_2141;SO_3384;{SO_2141};1
SO_2186;SO_2187;{SO_2186};1
SO_2200;SO_2202;{SO_2200};1
SO_229;SO_230;{SO_229};1
SO_2299;SO_3412;{SO_2299};1
SO_2303;SO_3413;{SO_2303};1
SO_2320;SO_2321;{SO_2320};1
SO_2323;SO_2324;{SO_2323};1
SO_2340;SO_2341;{SO_2340};1
SO_2366;SO_2367;{SO_2366};1
SO_2393;SO_2395;{SO_2393};1
SO_2396;SO_2397;{SO_2396};1
SO_2450;SO_2451;{SO_2450};1
SO_2507;SO_2594;{SO_2507};1
SO_2567;SO_2569;{SO_2567};1
SO_2585;SO_2586;{SO_2585};1
SO_2770;SO_2771;{SO_2770};1
SO_2812;SO_2822;{SO_2812};1
SO_2842;SO_2843;{SO_2842};1
SO_2847;SO_2848;{SO_2847};1
SO_288;SO_874;{SO_288};1
SO_2915;SO_2923;{SO_2915};1
SO_2916;SO_2924;{SO_2916};1
SO_292;SO_79;{SO_292};1
SO_2979;SO_2980;{SO_2979};1
SO_2981;SO_2983;{SO_2981};1
SO_3026;SO_3027;{SO_3026};1
SO_3081;SO_962;{SO_3081};1
SO_313;SO_315;{SO_313};1
SO_3150;SO_3153;{SO_3150};1
SO_3170;SO_3171;{SO_3170};1
SO_3185;SO_3614;{SO_3185};1
SO_3329;SO_3330;{SO_3329};1
SO_3386;SO_3387;{SO_3386};1
SO_339;SO_340;{SO_339};1
SO_3460;SO_3461;{SO_3460};1
SO_3532;SO_3533;{SO_3532};1
SO_3619;SO_3620;{SO_3619};1
SO_374;SO_382;{SO_374};1
SO_42;SO_44;{SO_42};1
SO_456;SO_457;{SO_456};1
SO_969;SO_971;{SO_969};1
*/