#/bin/sh # # This script is aimed at restoring postgis data # from a dumpfile produced by pg_dump -Fc # # Basically it will restore all but things created by # the given postgis.sql. # Before restoring, it will create and postgis-enable # the target db. # # A particular attention must be given to the spatial_ref_sys # and geometry_columns tables which are created and populated # from the dump, not the postgis.sql file. When the new installation # is agains pgsql7.5+ and dump from pre7.5 this script should probably # drop statistic fields from that table.... currently not done. # Also, when upgrading to pgsq8.1+ (from <8.1) the oid column of # geometry_columns will be dropped, while it is needed for # postgis opearations. # # Issues: # o Some obsoleted functions would not be present in the # postgis.sql, but will be found in the dump. Currently # some are skipped, but some depend on the pg version # so will issue an ERROR due to unavailability of # corresponding C function in postgis lib. # # o This script could do less then it does, to allow users # to further modify edited dump before feeding it to the # restoring side. # # # Tested on: # # pg_dump-734/pg734 => pg_restore-743/pg743 # pg_dump-743/pg734 => pg_restore-743/pg743 # pg_dump-743/pg743 => pg_restore-743/pg743 # pg_dump-734/pg734 => pg_restore-800/pg800 # pg_dump-743/pg734 => pg_restore-800/pg800 # pg_dump-743/pg743 => pg_restore-800/pg800 # pg_dump-800/pg800 => pg_restore-800/pg800 # eval "exec perl -w $0 $@" if (0); use strict; (@ARGV >= 3) || die "Usage: postgis_restore.pl []\nRestore a custom dump (pg_dump -Fc) of a postgis enabled database.\n"; my $DEBUG=1; my %aggs = (); my %casts = (); my %funcs = (); my %types = (); my %opclass = (); my %ops = (); # This are old postgis functions which might # still be in a dump my %obsoleted_function = ( 'linefromtext', 1, 'linestringfromtext', 1, 'mlinefromtext', 1, 'multilinestringfromtext', 1, 'mpolyfromtext', 1, 'multipolygonfromtext', 1, 'polyfromtext', 1, 'polygonfromtext', 1, 'pointfromtext', 1, 'mpointfromtext', 1, 'multipointfromtext', 1, 'geomcollfromtext', 1, 'geometryfromtext', 1, 'geomfromtext', 1, 'pointfromwkb(geometry, integer)', 1, 'pointfromwkb(geometry)', 1, 'linefromwkb(geometry, integer)', 1, 'linefromwkb(geometry)', 1, 'linestringfromwkb(geometry, integer)', 1, 'linestringfromwkb(geometry)', 1, 'polyfromwkb(geometry, integer)', 1, 'polyfromwkb(geometry)', 1, 'polygonfromwkb(geometry, integer)', 1, 'polygonfromwkb(geometry)', 1, 'mpointfromwkb(geometry, integer)', 1, 'mpointfromwkb(geometry)', 1, 'multipointfromwkb(geometry, integer)', 1, 'multipointfromwkb(geometry)', 1, 'multilinefromwkb(geometry, integer)', 1, 'multilinefromwkb(geometry)', 1, 'mlinefromwkb(geometry, integer)', 1, 'mlinefromwkb(geometry)', 1, 'mpolyfromwkb(geometry, integer)', 1, 'mpolyfromwkb(geometry)', 1, 'multipolyfromwkb(geometry, integer)', 1, 'multipolyfromwkb(geometry)', 1, 'geomcollfromwkb(geometry, integer)', 1, 'geomcollfromwkb(geometry)', 1, 'wkb_in', 1, 'wkb_out', 1, 'wkb_recv', 1, 'wkb_send', 1, 'postgisgistcostestimate', 1, 'ggeometry_compress', 1, 'ggeometry_picksplit', 1, 'gbox_picksplit', 1, 'ggeometry_union', 1, 'gbox_union', 1, 'ggeometry_same', 1, 'gbox_same', 1, 'rtree_decompress', 1, 'ggeometry_penalty', 1, 'gbox_penalty', 1, 'geometry_union(geometry, geometry)', 1, 'geometry_inter(geometry, geometry)', 1, 'geometry_size', 1, 'ggeometry_consistent', 1, 'create_histogram2d(box3d, integer)', 1, 'estimate_histogram2d(histogram2d, box)', 1, 'xmin(box2d)', 1, 'ymin(box2d)', 1, 'xmax(box2d)', 1, 'ymax(box2d)', 1, 'optimistic_overlap', 1, 'unite_finalfunc', 1, 'numb_sub_objs(geometry)', 1, 'truly_inside(geometry, geometry)', 1 ); # This are old postgis operators which might # still be in a dump my %obsoleted_ops = ( '>>,box2d,box2d', 1, '<<,box2d,box2d', 1, '&>,box2d,box2d', 1, '&<,box2d,box2d', 1, '&&,box2d,box2d', 1, '~=,box2d,box2d', 1, '~,box2d,box2d', 1, '@,box2d,box2d', 1 ); my $postgissql = $ARGV[0]; shift(@ARGV); my $dbname = $ARGV[0]; shift(@ARGV); my $dump = $ARGV[0]; shift(@ARGV); my $createdb_opt = ''; my $dumplist=$dump.".list"; my $dumpascii=$dump.".ascii"; $createdb_opt = join(' ', @ARGV) if @ARGV; print "postgis.sql is $postgissql\n"; print "dbname is $dbname\n"; print "dumpfile is $dump\n"; print "database creation options: $createdb_opt\n" if $createdb_opt; # # Canonicalize type names (they change between dump versions). # Here we also strip schema qualification # sub canonicalize_typename { my $arg=shift; # Lower case $arg = lc($arg); # Trim whitespaces $arg =~ s/^ *//; $arg =~ s/ *$//; # Strip schema qualification #$arg =~ s/^public.//; $arg =~ s/^.*\.//; # Handle type name changes if ( $arg eq 'opaque' ) { $arg = 'internal'; } elsif ( $arg eq 'boolean' ) { $arg = 'bool'; } elsif ( $arg eq 'oldgeometry' ) { $arg = 'geometry'; } return $arg; } # # Scan postgis.sql # print "Scanning $postgissql\n"; open( INPUT, $postgissql ) || die "Couldn't open file: $postgissql\n"; while( my $line = ) { $line =~ s/[\r\n]//g; #print "LINE: $line\n"; next if $line =~ /^ *--/; if ($line =~ /^ *create (or replace)? function ([^ ]*) *\((.*)\)/i) { my $name = lc($2); my @args = split(",", $3); my $geomfound = 0; for (my $i=0; $i<@args; $i++) { my $arg = lc($args[$i]); #print "ARG1: [$arg]\n"; $arg =~ s/^ *//; $arg =~ s/ *$//; #print "ARG2: [$arg]\n"; if ( $arg =~ /^int[48]?$/ ) { $args[$i] = 'integer'; next; } if ( $arg eq 'float4' ) { $args[$i] = 'real'; next; } if ( $arg eq 'float8' ) { $args[$i] = 'double precision'; next; } if ( $arg eq 'varchar' ) { $args[$i] = 'character varying'; next; } if ( $arg eq 'boolean' ) { $args[$i] = 'bool'; next; } if ( $arg eq 'opaque' ) { $args[$i] = 'internal'; next; } $args[$i] = $arg; $geomfound++ if ( $arg eq 'oldgeometry' ); } my $id = $name."(".join(", ", @args).")"; $funcs{$id} = 1; print "SQLFUNC: $id\n" if $DEBUG; if ( $geomfound ) { for (my $i=0; $i<@args; $i++) { my $arg = $args[$i]; $arg = 'geometry' if ($arg eq 'oldgeometry'); $args[$i] = $arg; } my $id = $name."(".join(", ", @args).")"; $funcs{$id} = 1; print "SQLFUNC: $id\n" if $DEBUG; } next; } if ($line =~ /^create type +([^ ]+)/i) { my $type = $1; $types{$type} = 1; print "SQLTYPE $type\n" if $DEBUG; if ( $type eq 'oldgeometry' ) { $type = 'geometry'; $types{$type} = 1; print "SQLTYPE $type\n" if $DEBUG; } next; } if ($line =~ /^create aggregate *([^ ]*) *\(/i) { my $name = lc($1); $name =~ s/^public.//; my $type = undef; while( my $subline = ) { if ( $subline =~ /basetype .* ([^, ]*)/ ) { $type = $1; last; } last if $subline =~ /;[\t ]*$/; } if ( ! defined($type) ) { print "Could not find base type for aggregate $name\n"; print "($line)\n"; exit 1; } my $id = $name.'('.$type.')'; print "SQLAGG $id\n" if $DEBUG; $aggs{$id} = 1; if ( $type eq 'oldgeometry' ) { $type = 'geometry'; my $id = $name.'('.$type.')'; $aggs{$id} = 1; print "SQLAGG $id\n" if $DEBUG; } next; } # CAST if ($line =~ /create cast *\( *([^ ]*) *as *([^ )]*) *\) *with function *([^ ]*) *\(([^ ]*) *\)/i) { my $from = canonicalize_typename($1); my $to = canonicalize_typename($2); my $funcname = canonicalize_typename($3); my $funcarg = canonicalize_typename($4); my $id = $funcarg.'.'.$funcname; $casts{$id} = 1; print "SQLFNCAST $id\n" if $DEBUG; $id = $from.','.$to; $casts{$id} = 1; print "SQLCAST $id\n" if $DEBUG; next; } # OPERATOR CLASS if ($line =~ /create operator class *([^ ]*)/i) { my $id = lc($1); print "SQLOPCLASS $id\n" if $DEBUG; $opclass{$id} = 1; next; } # OPERATOR if ($line =~ /create operator *([^ ]*)/i) { my $name = ($1); my $larg = undef; my $rarg = undef; while( my $subline = ) { last if $subline =~ /;[\t ]*$/; if ( $subline =~ /leftarg *= *([^ ,]*)/i ) { $larg=lc($1); } if ( $subline =~ /rightarg *= *([^ ,]*)/i ) { $rarg=lc($1); } } my $id = $name.','.$larg.','.$rarg; print "SQLOP $id\n" if $DEBUG; $ops{$id} = 1; if ( $larg eq 'oldgeometry' || $rarg eq 'oldgeometry' ) { $larg = 'geometry' if $larg eq 'oldgeometry'; $rarg = 'geometry' if $rarg eq 'oldgeometry'; my $id = $name.','.$larg.','.$rarg; print "SQLOP $id\n" if $DEBUG; $ops{$id} = 1; } next; } } close( INPUT ); #exit; # # Scan dump list # print "Scanning $dump list\n"; open( OUTPUT, ">$dumplist") || die "Can't write to ".$dump.".list\n"; open( INPUT, "pg_restore -l $dump |") || die "Couldn't run pg_restore -l $dump\n"; while( my $line = ) { next if $line =~ /^;/; next if $line =~ /^ *--/; if ($line =~ / FUNCTION/) { my $funcname; my @args; #print "FUNCTION: [$line]\n"; if ($line =~ / FUNCTION *([^ ]*) *\(([^)]*)\)/) { #print " matched <800\n"; $funcname = $1; @args = split(",", $2); } elsif ($line =~ / FUNCTION *([^ ]+) *([^ ]+) *\(([^)]*)\)/) { #print " matched 800\n"; $funcname = $2; @args = split(",", $3); } else { print " unknown FUNCTION match\n"; } $funcname =~ s/^"//; $funcname =~ s/"$//; #print " FUNCNAME: [$funcname]\n"; #print " ARGS: [".@args."]\n"; my $wkbinvolved = 0; for (my $i=0; $i<@args; $i++) { my $arg = canonicalize_typename($args[$i]); $args[$i] = $arg; $wkbinvolved++ if ( $arg eq 'wkb' ); } my $args = join(', ', @args); #print "ARGS SCALAR: [$args]\n"; my $id = $funcname."(".$args.")"; #print "ID: [$id]\n"; # WKB type is obsoleted if ( $wkbinvolved ) { print "SKIPPING FUNC $id\n" if $DEBUG; next; } if ( $funcname eq 'plpgsql_call_handler' ) { print "SKIPPING FUNC $id\n" if $DEBUG; next; } if ( $funcname eq 'plpgsql_validator' ) { print "SKIPPING FUNC $id\n" if $DEBUG; next; } if ( $obsoleted_function{$funcname} || $obsoleted_function{$id} ) { print "SKIPPING OBSOLETED FUNC $id\n" if $DEBUG; next; } if ( $funcs{$id} ) { print "SKIPPING PGIS FUNC $id\n" if $DEBUG; next; } print "KEEPING FUNCTION: [$id]\n" if $DEBUG; #next; } elsif ($line =~ / AGGREGATE ([^ ]* )?([^ ]*)\((.*)\)/) { my $name = $2; my @args = split(",", $3); for (my $i=0; $i<@args; $i++) { $args[$i] = canonicalize_typename($args[$i]); } my $args = join(', ', @args); my $id = $name."(".$args.")"; if ( $aggs{$id} ) { print "SKIPPING PGIS AGG $id\n" if $DEBUG; next; } # This is an old postgis aggregate if ( $name eq 'fastunion' ) { print "SKIPPING old PGIS AGG $id\n" if $DEBUG; next; } # This is an old postgis aggregate if ( $name eq 'mem_collect' ) { print "SKIPPING old PGIS AGG $id\n" if $DEBUG; next; } print "KEEPING AGGREGATE [$id]\n" if $DEBUG; #next; } elsif ($line =~ / TYPE ([^ ]+ )?([^ ]*) .*/) { my $type = canonicalize_typename($2); if ( $type eq 'wkb' ) { print "SKIPPING PGIS TYPE $type\n" if $DEBUG; next; } if ( $types{$type} ) { print "SKIPPING PGIS TYPE $type\n" if $DEBUG; next; } print "KEEPING TYPE [$type]\n" if $DEBUG; #next; } elsif ($line =~ / PROCEDURAL LANGUAGE (public )?plpgsql/) { print "SKIPPING PROCLANG plpgsql\n" if $DEBUG; next; } # spatial_ref_sys and geometry_columns elsif ($line =~ / TABLE geometry_columns/) { #print "SKIPPING geometry_columns schema\n" if $DEBUG; #next; } elsif ($line =~ / TABLE spatial_ref_sys/) { #print "SKIPPING spatial_ref_sys schema\n" if $DEBUG; #next; } # # pg_restore-7.4: # 354; 11038762 OPERATOR CLASS btree_geometry_ops strk # # pg_restore-8.0: # 354; 0 11038762 OPERATOR CLASS public btree_geometry_ops strk # elsif ($line =~ / OPERATOR CLASS +([^ ]+ )?([^ ]+) ([^ ]+)/) { my $id = lc($2); if ( $opclass{$id} ) { print "SKIPPING PGIS OPCLASS $id\n" if $DEBUG; next; } print "KEEPING OPCLASS [$id]\n" if $DEBUG; } # casts were implicit in PG72 elsif ($line =~ / CAST /) { my $arg1=undef; my $arg2=undef; # # CAST def by pg_restore 80,81 on pg_dump 73 # # 734_800; 0 00000 CAST public box2d (public.box3d) # 734_810; 0 00000 CAST public box2d (public.box3d) # if ($line =~ / CAST *([^ ]+) ([^ ]+) *\( *([^ )]+) *\)/) { $arg1 = canonicalize_typename($3); $arg2 = canonicalize_typename($2); } # # CAST def by pg_restore 73,74 on pg_dump 73 # # 734_743; 00000 CAST box2d (public.box3d) # 734_734; 00000 CAST box2d (public.box3d) # elsif ($line =~ / CAST *([^ ]*) *\( *([^ )]*) *\)/) { $arg1 = canonicalize_typename($2); $arg2 = canonicalize_typename($1); } # # CAST def by pg_restore 81 on pg_dump 81 # # 810_810; 0000 00000 CAST pg_catalog CAST (boolean AS text) # elsif ($line =~ / CAST [^ ]* CAST \(([^ ]*) AS ([^ )]*)\)/) { $arg1 = canonicalize_typename($1); $arg2 = canonicalize_typename($2); } if (defined($arg1) && defined($arg2)) { my $id = $arg1.",".$arg2; if ( $casts{$id} ) { print "SKIPPING PGIS CAST $id\n" if $DEBUG; next; } if ($arg1 eq 'wkb' || $arg2 eq 'wkb') { print "SKIPPING PGIS CAST $id\n" if $DEBUG; next; } print "KEEPING CAST $id (see CAST)\n" if $DEBUG; } else { print "KEEPING CAST (unknown def): $line\n"; } } # CAST print OUTPUT $line; # print "UNANDLED: $line" } close( INPUT ); close(OUTPUT); print "Producing ascii dump $dumpascii\n"; open( INPUT, "pg_restore -L $dumplist $dump |") || die "Can't run pg_restore\n"; open( OUTPUT, ">$dumpascii") || die "Can't write to $dumpascii\n"; while( my $line = ) { next if $line =~ /^ *--/; if ( $line =~ /^SET search_path/ ) { $line =~ s/; *$/, public;/; } elsif ( $line =~ /OPERATOR CLASS /) { } elsif ( $line =~ /CREATE OPERATOR *([^ ,]*)/) { my $name = canonicalize_typename($1); my $larg = undef; my $rarg = undef; my @sublines = ($line); while( my $subline = ) { push(@sublines, $subline); last if $subline =~ /;[\t ]*$/; if ( $subline =~ /leftarg *= *([^ ,]*)/i ) { $larg=canonicalize_typename($1); } if ( $subline =~ /rightarg *= *([^ ,]*)/i ) { $rarg=canonicalize_typename($1); } } my $id = $name.','.$larg.','.$rarg; if ( $obsoleted_ops{$id} ) { print "SKIPPING OBSOLETED FUNC $id\n" if $DEBUG; next; } if ( $ops{$id} ) { print "SKIPPING PGIS OP $id\n" if $DEBUG; next; } print "KEEPING OP $id\n" if $DEBUG; print OUTPUT @sublines; next; } print OUTPUT $line; # TODO: # skip postgis operator, checking for basetype # when implemented operators skip must be disabled # in the first scan of ToC } close(INPUT) || die "pg_restore call failed\n"; close(OUTPUT); #exit(1); # # Create the new db and install plpgsql language # print "Creating db ($dbname)\n"; `createdb $dbname $createdb_opt`; die "Database creation failed\n" if ($?); print "Adding plpgsql\n"; `createlang plpgsql $dbname`; # # Open a pipe to the SQL monitor # open( PSQL, "| psql -a $dbname") || die "Can't run psql\n"; # # Source new postgis.sql # print "Sourcing $postgissql\n"; open(INPUT, "<$postgissql") || die "Can't read $postgissql\n"; while() { print PSQL; } close(INPUT); # # Drop geometry_columns and spatial_ref_sys # (we want version from the dump) # print "Dropping geometry_columns and spatial_ref_sys\n"; print PSQL "DROP TABLE geometry_columns;"; print PSQL "DROP TABLE spatial_ref_sys;"; #print "Now source $dumpascii manually\n"; #exit(1); # # Source modified ascii dump # print "Restoring ascii dump $dumpascii\n"; open(INPUT, "<$dumpascii") || die "Can't read $postgissql\n"; while() { print PSQL; } close(INPUT); close(PSQL) || die "psql run failed\n"