wiki:CookBook join BDMAP_CCM v2

Version 93 (modified by cedric, 14 years ago) (diff)

--

back to first page ..
back to Recipes for EDA CookBook Eda

How to join the BDMAP and CCM version 2

see Joining BDMAP and the CCM layer CookBook join BDMAP_CCM
for the first version.
The trouble is that some dams or electrofishing stations are sometimes badly projected
We should start at step 6

1. putting bd_carthage into postgis

below an example on how to load from latin1 database to postgis only hylcov_arc

cd C:\eda\couches_SIG\BDCarthage09
C:\"Program Files"\PostgreSQL\8.4\bin\psql -p 5433 -U postgres -c "CREATE DATABASE bd_carthage  WITH OWNER = postgres  template=postgis ENCODING = 'UTF8'  LC_COLLATE = 'French_France.1252'  LC_CTYPE = 'French_France.1252' CONNECTION LIMIT = -1;"
C:\"Program Files"\PostgreSQL\8.4\bin\psql -p 5433 -U postgres -c "COMMENT ON DATABASE bd_carthage IS 'database bd_carthage2009" 
C:\"Program Files"\PostgreSQL\8.4\bin\shp2pgsql -s 3035 -c -g the_geom -W LATIN1 -I hylcov_arc.shp hylcov_arc> hylcov_arc.sql 
C:\"Program Files"\PostgreSQL\8.4\bin\psql -d bd_carthage -h localhost -U postgres -p 5433 -f hylcov_arc.sql 

We also need the nodes from bd_carthage use downstream node instead

cd C:\eda\couches_SIG\BDCarthage09
C:\"Program Files"\PostgreSQL\8.4\bin\shp2pgsql -s 3035 -c -g the_geom -W LATIN1 -I hylcov_node.shp hylcov_node> hylcov_node.sql 
C:\"Program Files"\PostgreSQL\8.4\bin\psql -d bd_carthage -h localhost -U postgres -p 5433 -f hylcov_node.sql 

changing the projections (if the layer is not already in 3035 the first four lines can be avoided by putting -s 2154 in the previous box

-- les projections n'étaient pas en 3035, erreur ci dessus que je corrige
alter table hylcov_node drop constraint enforce_srid_the_geom ;
alter table hylcov_arc drop constraint enforce_srid_the_geom ;
update hylcov_node set the_geom=ST_SetSRID(the_geom, 2154);
update hylcov_arc set the_geom=ST_SetSRID(the_geom, 2154);
update hylcov_arc set the_geom=ST_Transform(the_geom,3035);
update hylcov_node set the_geom=ST_Transform(the_geom,3035);
alter table hylcov_node add constraint enforce_srid_the_geom CHECK (st_srid(the_geom) = 3035);
alter table hylcov_arc add constraint enforce_srid_the_geom CHECK (st_srid(the_geom) = 3035);

2. getting distance sea and distance source for bd_carthage

Below after some search, but we have to trust the last data in the bd_carthage... Do we really ? Coming back from the end of the calculations, I am now reversing and putting back Cédric's calculation of distance source.

load(paste(datawd,"/dataEDAcommun/chainage/parcours_source/noeud&troncon_final.RData",sep=""))
ex(colnames(noeuds_final))
noeuds_1<-read.delim(paste(datawd,"/dataEDAcommun/amorce_mer/bassin final/source/troncon_parcouru_source.txt",sep=""),sep="\t")
#write.table(noeuds_final,file=paste(datawd,"/dataEDAcommun/chainage/parcours_source/noeud_troncon_final.csv",sep=""),sep=";",row.names=FALSE,col.names=TRUE)
str(noeuds_final)
str(noeuds_1)
noeuds_final.1<-merge(noeuds_1[,c("nouveau_noeud_aval","dist_source_max_aval")],noeuds_final,by.x="nouveau_noeud_aval",by.y="Id_BDCARTHAGE")
plot(noeuds_final.1$dist_source_max_aval,noeuds_final.1$distance_source) # inquiétant non ?
str(noeuds_final.1)
noeuds_final.1[is.infinite(noeuds_final.1)]
write.table(noeuds_final.1,file=paste(datawd,"/dataEDAcommun/chainage/parcours_source/noeud_troncon_final_2.csv",sep=""),sep=";",row.names=FALSE,col.names=TRUE)
-- in bd_carthage database, here is the second version accounting for inclusion of firstly calculated distances source
-- in fact these have been also calculated by Laurent and seem really wrong
drop table noeud_troncon_final CASCADE;
create table noeud_troncon_final (
Id_BDCARTHAGE integer,
dist_source_max_aval numeric,
situation varchar(10), 
distance_mer numeric,
bassin integer,
noeud_mer integer,
niveau integer,
nb_confluence integer,
nb_defluence integer,
noeud_source integer,
distance_source numeric, 
distance_source_cumulee numeric,
statut varchar(30),
strahler integer,
niveau_defluence numeric,
distance_affluent_defluence numeric,
id_carthage_bis varchar(10),
type_noeud_source varchar(30));

COPY noeud_troncon_final from 'C:/base/noeud_troncon_final_2.csv' with CSV header delimiter as ';'  NULL as 'NA'
CREATE INDEX hylcov_node_ID_SOM
  ON hylcov_node
  USING btree
  (ID_SOM);

CREATE INDEX hylcov_arc_id_som_f
  ON hylcov_arc
  USING btree
  (id_som_f);

CREATE INDEX hylcov_arc_ID_SOM
  ON hylcov_node
  USING btree
  (ID_SOM);

CREATE INDEX noeud_troncon_final_Id_BDCARTHAGE
  ON noeud_troncon_final
  USING btree
  (Id_BDCARTHAGE);

drop view if exists hylcov_arc_dist;
create view hylcov_arc_dist as(
select * from hylcov_arc h left join  noeud_troncon_final n on id_som_f=Id_BDCARTHAGE );
-- troisième essai
-- Les min et max de noeud_source_total$Id_BDCARTHAGE et hylcov_node$id_som correspondent
-- importation de la table puis jointure avec la table des noeuds....
drop table if exists noeuds;
create table noeuds  (
Id_BDCARTHAGE integer,
situation varchar(10),
distance_mer numeric,
bassin integer,
noeud_mer integer,
niveau integer,
dist_source_max numeric,
dist_source_cum numeric,
noeud_source integer);
-- saving the table in c:/base in .csv format
COPY noeuds from 'C:/base/noeud_source_total.csv' with CSV header delimiter as ';'  NULL as 'NA'

-- first a trial on downstream segment see figure downstream segment
drop view if exists hylcov_arc_dist2;
create view hylcov_arc_dist2 as(
select * from hylcov_arc left join  noeuds on id_som_f=Id_BDCARTHAGE );

-- calculating distance for the upstream segment
drop table if exists hylcov_arc_dist2;
create table hylcov_arc_dist2 as(
select * from hylcov_arc left join  noeuds on id_som_i=Id_BDCARTHAGE );
-- On est obligé de faire la jointure avec le troncon amont autrement les troncons partant du cours principal se voient attribués
-- des distances source qui sont celles de l'axe. Par contre il faut rajouter à tous les tronçons la longueur du tronçon.
update hylcov_arc_dist2 set dist_source_max= dist_source_max+length/1000;

to shape

C:\"Program Files"\PostgreSQL\8.4\bin\pgsql2shp -f "C:\eda\exports_shape\hylcov_arc_dist" -p 5433 -u postgres -P postgres -g the_geom -r -k bd_carthage hylcov_arc_dist

distances from Laurent are ok but not on the whole river length
source:data/Docs/trac/bd_carthage/distance_source_hylcov_arc.png

distances from Laurent (2 nd trial not shown)... simply a mess ... possibly join pb
distances from Cédric joining downstream node
source:data/Docs/trac/bd_carthage/distance_source_cedric_noeud_aval.jpeg
distances from Cédric joining ustream node and adding the length of the segment
source:data/Docs/trac/bd_carthage/distance_source_cedric_noeud_amont.jpeg

3. Get distances into the stationsp2 table

31. Saving table stationsp2 from eda20 to bd_carthage

cd C:\eda\backup
C:\"Program Files"\PostgreSQL\8.4\bin\pg_dump  -U postgres -p 5433 -t bd_map.stationsp2 eda2.0> stationsp2.sql
C:\"Program Files"\PostgreSQL\8.4\bin\psql -d bd_carthage -p 5433 -U postgres -f stationsp2.sql
REM une erreur quand il essaye de mettre le schéma bd_map mais là on a pas besoin d'avoir les données dans un schéma

at this stage it is usefull to check the geometry column table and correct it eventually

32. project stationsp2 again and get table of all projections within 300

Some stations were missing...

  • first create two index to speed up queries

bdcarthage

CREATE INDEX indexhylcov_arc ON hylcov_arc
  USING GIST ( the_geom GIST_GEOMETRY_OPS );
CREATE INDEX indexStationsp2 ON stationsp2
  USING GIST ( the_geom GIST_GEOMETRY_OPS );

Joining the hylcovarc and stationsp2, the_geom is that of stationsp2

-- creation de la table bd_map_bd_carthage, je rajoute un the_geom a la requete pour pouvoir voir la table dans Qgis  
DROP TABLE IF EXISTS bd_map_bd_carthage;
CREATE TABLE bd_map_bd_carthage as (
     SELECT distinct on (st_codecsp) st_codecsp, distance_source,distance_mer,strahler,id_trhyd, min(distance) as distance, the_geom FROM (
               SELECT st_codecsp, n.distance_source,n.distance_mer,n.strahler ,a.id_trhyd ,CAST(distance(a.the_geom, s.the_geom) as  decimal(15,1)) as distance,s.the_geom
               FROM stationsp2 As s
               INNER JOIN hylcov_arc a ON ST_DWithin(a.the_geom, s.the_geom,300)
                LEFT JOIN noeud_troncon_final n ON a.id_som_f::numeric = n.id_bdcarthage
               WHERE s.the_geom IS NOT NULL
               ORDER BY st_codecsp) as sub
        GROUP BY st_codecsp, distance, distance_source,distance_mer,strahler, id_trhyd, the_geom
 );
alter table  bd_map_bd_carthage add column id serial;
-- mise à jour de la table geometry_columns
INSERT INTO geometry_columns(f_table_catalog, f_table_schema, f_table_name, f_geometry_column, coord_dimension, srid, "type")
SELECT '', 'public', 'bd_map_bd_carthage', 'the_geom', ST_CoordDim(the_geom), ST_SRID(the_geom), GeometryType(the_geom)
FROM bd_map_bd_carthage LIMIT 1;

-- creation d'index, clé primaire, et constraintes qui vont bien
alter table bd_map_bd_carthage add CONSTRAINT enforce_dims_the_geom CHECK (ndims(the_geom) = 2);
alter table bd_map_bd_carthage add  CONSTRAINT enforce_geotype_the_geom CHECK (geometrytype(the_geom) = 'POINT'::text OR the_geom IS NULL);
alter table bd_map_bd_carthage add  CONSTRAINT enforce_srid_the_geom CHECK (srid(the_geom) = 3035);
alter table bd_map_bd_carthage ADD CONSTRAINT pk_id PRIMARY KEY(id);
CREATE INDEX indexbd_map_bd_carthage ON bd_map_bd_carthage
  USING GIST ( the_geom GIST_GEOMETRY_OPS );

33. Saving back table stationsp2 from bd_carthage to eda

cd C:\eda\backup
C:\"Program Files"\PostgreSQL\8.4\bin\pg_dump  -U postgres -p 5433 -t bd_map_bd_carthage bd_carthage> bd_map_bd_carthage.sql
C:\"Program Files"\PostgreSQL\8.4\bin\psql -d eda2.0 -p 5433 -U postgres -f bd_map_bd_carthage.sql
C:\"Program Files"\PostgreSQL\8.4\bin\psql -d eda2.0 -p 5433 -U postgres -c "alter table bd_map_bd_carthage drop CONSTRAINT  pk_id;"
C:\"Program Files"\PostgreSQL\8.4\bin\psql -d eda2.0 -p 5433 -U postgres -c "alter table bd_map_bd_carthage ADD CONSTRAINT pk_id_bd_map_bd_carthage PRIMARY KEY(id);
C:\"Program Files"\PostgreSQL\8.4\bin\psql -d eda2.0 -p 5433 -U postgres -c "alter table bd_map_bd_carthage set schema bd_map"

4. define criteria helping to do the projection

Below this table does not only select minimum distance but all ccm segments which are within a 300 m distance from the electrofishing station.

DROP TABLE IF EXISTS bd_map.bdmap_ccm2;
CREATE TABLE bd_map.bdmap_ccm2 as (
               SELECT st_codecsp, r.cum_len_sea, r.distance_source, r.strahler as strahlerccm,gid ,CAST(distance(r.the_geom, s.the_geom) as  decimal(15,1)) as distance,s.the_geom 
               FROM bd_map.stationsp2 As s
               INNER JOIN  ccm21.riversegments r ON ST_DWithin(r.the_geom, s.the_geom,300)
               WHERE s.the_geom IS NOT NULL
               ORDER BY st_codecsp 

);
alter table bd_map.bdmap_ccm2 add column id serial;

The table from the request joining bd_map.bd_map_bd_carthage with the bd_map.bdmap_ccm2 is exported there :

select * from bd_map.bd_map_bd_carthage b join bd_map.bdmap_ccm2 c on b.st_codecsp=c.st_codecsp;

source:data/dataEDAccm/export_jointure_bdcarthage_ccm.csv
Some processing with R and some results We are trying to see if distance_source distance_sea and strahler are consistent between bd_carthage and ccm

# analyse des relations entre distance source et distance mer pour les projections sur la ccm
library(lattice)
library(ggplot2)
ccm_bd<-read.csv(paste(datawd,"/dataEDAccm/export_jointure_bdcarthage_ccm.csv",sep=""),sep=";",stringsAsFactors = FALSE)
str(ccm_bd)
ccm_bd$distance_source_ccm=ccm_bd$distance_source.1/1000
ccm_bd$cum_len_sea=ccm_bd$cum_len_sea/1000
ccm_bd$pbstrahler=ccm_bd$strahlerccm>(ccm_bd$strahler+1) # which strahler are larger in ccm than in bd_carthage, allowing diff of one 
ratio_dist_mer=ccm_bd$cum_len_sea/ccm_bd$distance_mer
ccm_bd$pb_ratio_dist_mer<-ratio_dist_mer>2&ccm_bd$cum_len_sea>100 # la distance mer est le tripe pour la ccm (peut arriver pour les faibles valeurs)
ratio_sources<-ccm_bd$distance_source_ccm/ccm_bd$distance_source&ccm_bd$cum_len_sea>100
ccm_bd$pbratiosource<-ratio_sources>2 # la distance source ccm est égale à trois fois la distance source bd_carthage

sum(ccm_bd$pbratiosource,na.rm=TRUE)/nrow(ccm_bd) # 16 % 2279
sum(ccm_bd$pb_ratio_dist_mer,na.rm=TRUE)/nrow(ccm_bd) # 0.1 % 15
sum(ccm_bd$pbstrahler,na.rm=TRUE)/nrow(ccm_bd) #3% 517
sum(!ccm_bd$pbratiosource&!ccm_bd$pbstrahler&!ccm_bd$pb_ratio_dist_mer,na.rm=TRUE)/nrow(ccm_bd) # on garde 79 % des données
# hitograms
layout(matrix(c(1,2), 1, 2, byrow = TRUE))
hist(ratio_dist_mer[ratio_dist_mer<2],100,main="ratio dist. sea")
abline(v=2,col="red")
mtext(paste(round(sum(ccm_bd$pb_ratio_dist_mer,na.rm=TRUE)/nrow(ccm_bd),2)*100,"% out"),col="red")
hist(ratio_sources[ratio_sources<10],100,main="ratio dist source")
abline(v=3,col="red")
mtext(paste(round(sum(ccm_bd$pbratiosource,na.rm=TRUE)/nrow(ccm_bd),3)*100,"% out"),col="red")
# plots
g<-ggplot(ccm_bd)
g+geom_point(aes(x=distance_source,y=distance_source_ccm,colour=pbstrahler))
g+geom_point(aes(distance_mer,cum_len_sea,colour=pb_ratio_dist_mer))+geom_abline(slope=1,colour="red") +geom_abline(slope=1,intercept=1,colour="green") 
#g+geom_point(aes(distance_mer,cum_len_sea,colour=pbratiosource))+geom_abline(slope=1,colour="red")
g+geom_jitter(aes(x=strahler,y=strahlerccm,colour=pbstrahler),alpha=0.3)
g1<-g+geom_point(aes(x=distance_source,y=distance_source_ccm,colour=pbratiosource))+geom_abline(slope=2,colour="red")
ccm_bd1<-ccm_bd[ccm_bd$pbstrahler,]
g1+geom_point(aes(x=distance_source,y=distance_source_ccm),size=2,data=ccm_bd1,pch=1)

The following criteria have been used to select stations.
For distance_sea(ccm) > 100 distance_seaccm/distance_sea_bd_carthage<=2 => we mostly strive to avoid the projection of stations on short tributaries on larger streams from the ccm
Distance_source(ccm)/distance_source(bd_carthage)<=2
ccm_bd$strahlerccm-ccm_bd$strahler<=1
Finally 78% lines kept but this include doubles
source:data/Docs/trac/BDMAP/percentage_out.jpeg source:data/Docs/trac/BDMAP/comp_dist_source.jpeg source:data/Docs/trac/BDMAP/comp_dist_mer.jpeg source:data/Docs/trac/BDMAP/com_strahler.jpeg

5. query to select the final data from bd_map.bdmap_ccm2

select count(*) from bd_map.bdmap_ccm2; --13695 this one is with repeated projection on ccm riversegments within 300 m
select count(*) from bd_map.bdmap_ccm; --8884 this is without selection (first treatment) for the riversegement corresponding to the lowest proj distance
/*
REQUEST WITH SELECTION OF CORRECT CRITERIA FOR DISTANCE SEA, DISTANCE SOURCE, STRAHLER RANK
*/

select * from (
        select b.st_codecsp, 
        b.distance_source as dist_source_bdcar,
        c.distance_source/1000 as dist_source_ccm,
        (c.distance_source/(1000*b.distance_source))<=2 as dist_source_ratio,
        b.distance_mer as dist_sea_bdcar,
        c.cum_len_sea/1000 as dist_sea_ccm,
        NOT((c.cum_len_sea/(1000*b.distance_mer))>2 and c.cum_len_sea>100000) as dist_sea_ratio,
        b.strahler as strahler_bdcar,
        c.strahlerccm as strahler_ccm,
        (c.strahlerccm-b.strahler)<=1 as strahler_diff,
        b.distance as distproj_bdcar,
        c.distance as distproj_ccm,
        id_trhyd,
        gid,
        c.the_geom
        from bd_map.bd_map_bd_carthage b join bd_map.bdmap_ccm2 c on b.st_codecsp=c.st_codecsp
        where b.distance_source >0 -- to avoid division by zero
        and b.distance_mer>0) as sub
WHERE dist_source_ratio is TRUE
AND dist_sea_ratio is TRUE
AND strahler_diff is TRUE; --10783 lines this is with repeated stations but selection has removed ~ 3000 lines
/*
FINAL REQUEST
*/
drop table if exists bd_map.bdmap_ccm_final;
create table bd_map.bdmap_ccm_final as (        
select distinct on(st_codecsp) st_codecsp,
        gid,
        id_trhyd,
        dist_source_bdcar,
        dist_source_ccm,
        dist_sea_bdcar,
        dist_sea_ccm,
        strahler_bdcar,
        strahler_ccm,
        distproj_bdcar,
        min(distproj_ccm) as distproj_ccm,
        the_geom
        from( select *  
                from (
                        select b.st_codecsp, 
                        b.distance_source as dist_source_bdcar,
                        c.distance_source/1000 as dist_source_ccm,
                        (c.distance_source/(1000*b.distance_source))<=2 as dist_source_ratio,
                        b.distance_mer as dist_sea_bdcar,
                        c.cum_len_sea/1000 as dist_sea_ccm,
                        NOT((c.cum_len_sea/(1000*b.distance_mer))>2 and c.cum_len_sea>100000) as dist_sea_ratio,
                        b.strahler as strahler_bdcar,
                        c.strahlerccm as strahler_ccm,
                        (c.strahlerccm-b.strahler)<=1 as strahler_diff,
                        b.distance as distproj_bdcar,
                        c.distance as distproj_ccm,
                        id_trhyd,
                        gid,
                        c.the_geom
                        from bd_map.bd_map_bd_carthage b join bd_map.bdmap_ccm2 c on b.st_codecsp=c.st_codecsp
                        where b.distance_source >0 -- to avoid division by zero
                        and b.distance_mer>0
                ) as sub
                WHERE dist_source_ratio is TRUE
                AND dist_sea_ratio is TRUE
                AND strahler_diff is TRUE
        ) as sub1 --10783 lines
        group by st_codecsp, gid,id_trhyd,dist_source_bdcar,dist_source_ccm,dist_sea_bdcar,dist_sea_ccm,strahler_bdcar,strahler_ccm,distproj_bdcar,     distproj_ccm,
        the_geom
        order by st_codecsp --7036 8884-7036=1848 stations removed
) ;

/*
* ANALYSIS OF WITHDRAWN NUMBERS
*/
select count(*) from bd_map.bd_map_bd_carthage where distance_source=0 ;-- 42
select count(*) from bd_map.bd_map_bd_carthage where distance_mer=0; -- 5
-- voir comment on gère ces cas particuliers par la suite....
select sum(cast(not(dist_sea_ratio) as integer)) as sum_pb_sea,
        sum(cast(not(dist_source_ratio) as integer)) as sum_pb_source,
        sum(cast(not(strahler_diff) as integer)) as sum_pb_strahler
        from (
                select b.st_codecsp, 
                b.distance_source as dist_source_bdcar,
                c.distance_source/1000 as dist_source_ccm,
                (c.distance_source/(1000*b.distance_source))<=2 as dist_source_ratio,
                b.distance_mer as dist_sea_bdcar,
                c.cum_len_sea/1000 as dist_sea_ccm,
                NOT((c.cum_len_sea/(1000*b.distance_mer))>2 and c.cum_len_sea>100000) as dist_sea_ratio,
                b.strahler as strahler_bdcar,
                c.strahlerccm as strahler_ccm,
                (c.strahlerccm-b.strahler)<=1 as strahler_diff,
                b.distance as distproj_bdcar,
                c.distance as distproj_ccm,
                id_trhyd,
                gid,
                c.the_geom
                from bd_map.bd_map_bd_carthage b join bd_map.bdmap_ccm2 c on b.st_codecsp=c.st_codecsp
                where b.distance_source >0 
                and b.distance_mer>0
        ) as sub; -- 15 (R=15), 2234 (R=2279 including zero), 490 (R=517) OK
/*
* TABLE TO SHOW THE RESULTS IN A MAP FOR THOSE THAT WERE NOT SELECTED TOO
*/

drop table if exists bd_map.bdmap_ccm_full;
create table bd_map.bdmap_ccm_full as (
                select b.st_codecsp, 
                b.distance_source as dist_source_bdcar,
                c.distance_source/1000 as dist_source_ccm,
                (c.distance_source/(1000*b.distance_source))<=2 as dist_source_ratio,
                b.distance_mer as dist_sea_bdcar,
                c.cum_len_sea/1000 as dist_sea_ccm,
                NOT((c.cum_len_sea/(1000*b.distance_mer))>2 and c.cum_len_sea>100000) as dist_sea_ratio,
                b.strahler as strahler_bdcar,
                c.strahlerccm as strahler_ccm,
                (c.strahlerccm-b.strahler)<=1 as strahler_diff,
                b.distance as distproj_bdcar,
                c.distance as distproj_ccm,
                id_trhyd,
                gid,
                c.the_geom
                from bd_map.bd_map_bd_carthage b join bd_map.bdmap_ccm2 c on b.st_codecsp=c.st_codecsp
                where b.distance_source >0 
                and b.distance_mer>0
);

There is a problem of distance source too short for bd_carthage this trouble was already seen on the first figure on this page
source:data/Docs/trac/BDMAP/stations_selection_1.jpg When checking however there seems to have been no problems with Cédric first calculation of distance source.