-

   rss_rss_hh_new

 - e-mail

 

 -

 LiveInternet.ru:
: 17.03.2011
:
:
: 51

:


PostgreSQL 5

, 01 2017 . 10:28 +

PostgreSQL, , : - B-. GiST.

GiST


GiST generalized search tree. , , b-tree.

? b-tree : , ,  , ( !). , : , ,

GiST. , . , GiST- R- ( , ; ..), RD- .

PostgreSQL : . , , , 蠗 . GiST , : , , . , GiST .



GiST , -. .

( ) , , ( ) (TID). () .

( ) , . , . , GiST B-.

GiST (consistent) , , - .

, ( - ). , , 蠗 .

, , . , ( ), 堗 . . , , , .

: - . ( , , ).

, - , . , , - .

GiST . . , ( , ). . : ( ) ( reindex vacuum full). GiST- .

GiST:
  • ( ) ;
  • ;
  • .


R-


( ). B- , .

R- , , . , : .

R- (, ). , , .

, , , ; , .

, R-; ( airports -, openflights.org).


; .


; .


; , .

:



postgres=# create table points(p point);
CREATE TABLE
postgres=# insert into points(p) values
(point '(1,1)'), (point '(3,2)'), (point '(6,3)'),
(point '(5,5)'), (point '(7,8)'), (point '(8,6)');
INSERT 0 6
postgres=# create index on points using gist(p);
CREATE INDEX

:



, , : , . : p <@ box '(2,1),(6,3)' ( <@ points_ops ):

postgres=# set enable_seqscan = off;
SET
postgres=# explain(costs off) select * from points where p <@ box '(2,1),(7,4)';
QUERY PLAN
----------------------------------------------
Index Only Scan using points_p_idx on points
Index Cond: (p <@ '(7,4),(2,1)'::box)
(2 rows)

(- <@ , - , ) . , , . , ( ) , .



. (2,1)-(7,4) (1,1)-(6,3), (5,5)-(8,8), .



, : (3,2) (6,3).

postgres=# select * from points where p <@ box '(2,1),(7,4)';
p
-------
(3,2)
(6,3)
(2 rows)



pageinspect, , GiST-. ᠗ gevel. ; .

, . -, :

postgres=# select * from gist_stat('airports_coordinates_idx');
gist_stat
------------------------------------------
Number of levels: 4 +
Number of pages: 690 +
Number of leaf pages: 625 +
Number of tuples: 7873 +
Number of invalid tuples: 0 +
Number of leaf tuples: 7184 +
Total size of tuples: 354692 bytes +
Total size of leaf tuples: 323596 bytes +
Total size of index: 5652480 bytes+

(1 row)

, 690 : , .

: 10%.

-, :

postgres=# select * from gist_tree('airports_coordinates_idx');
gist_tree
-----------------------------------------------------------------------------------------
0(l:0) blk: 0 numTuple: 5 free: 7928b(2.84%) rightlink:4294967295 (InvalidBlockNumber) +
1(l:1) blk: 335 numTuple: 15 free: 7488b(8.24%) rightlink:220 (OK) +
1(l:2) blk: 128 numTuple: 9 free: 7752b(5.00%) rightlink:49 (OK) +
1(l:3) blk: 57 numTuple: 12 free: 7620b(6.62%) rightlink:35 (OK) +
2(l:3) blk: 62 numTuple: 9 free: 7752b(5.00%) rightlink:57 (OK) +
3(l:3) blk: 72 numTuple: 7 free: 7840b(3.92%) rightlink:23 (OK) +
4(l:3) blk: 115 numTuple: 17 free: 7400b(9.31%) rightlink:33 (OK) +
...

-, , . : .  box ( ). , :

postgres=# select level, a from gist_print('airports_coordinates_idx')
as t(level int, valid bool, a box) where level = 1;
level | a
-------+-----------------------------------------------------------------------
1 | (47.663586,80.803207),(-39.2938003540039,-90)
1 | (179.951004028,15.6700000762939),(15.2428998947144,-77.9634017944336)
1 | (177.740997314453,73.5178070068359),(15.0664,10.57970047)
1 | (-77.3191986083984,79.9946975708),(-179.876998901,-43.810001373291)
1 | (-39.864200592041,82.5177993774),(-81.254096984863,-64.2382965088)
(5 rows)

, .


, (, <@ p <@ box '(2,1),(7,4)'), , .

⠗ . order by , . :

postgres=# select * from points order by p <-> point '(4,7)' limit 2;
p
-------
(5,5)
(7,8)
(2 rows)


p <-> point '(4,7)' , <->, . : , (4,7). k-NN k-nearest neighbor search.

(distance), (, ꠗ points_ops). (s , o ):

postgres=# select amop.amopopr::regoperator, amop.amoppurpose, amop.amopstrategy
from pg_opclass opc, pg_opfamily opf, pg_am am, pg_amop amop
where opc.opcname = 'point_ops'
and opf.oid = opc.opcfamily
and am.oid = opf.opfmethod
and amop.amopfamily = opc.opcfamily
and am.amname = 'gist'
and amop.amoplefttype = opc.opcintype;
amopopr | amoppurpose | amopstrategy
-------------------+-------------+--------------
<<(point,point) | s | 1
>>(point,point) | s | 5
~=(point,point) | s | 6
<^(point,point) | s | 10
>^(point,point) | s | 11
<->(point,point) | o | 15
<@(point,box) | s | 28
<@(point,polygon) | s | 48
<@(point,circle) | s | 68
(9 rows)

. , , btree; . .

( ) , (- - ), . . . , , . 蠗 .

, ( PostgreSQL btree, ).

: (x1,y1) <-> (x2,y2) . , , . , , .

.



. . (1,1)-(6,3) 4.0, (5,5)-(8,8) 1.0.

. , (5,5)-(8,8) ( ):



, (5,5) (7,8). , , (1,1)-(6,3), 4.0 , .

, ?

postgres=# select * from points order by p <-> point '(4,7)' limit 3;
p
-------
(5,5)
(7,8)
(8,6)
(3 rows)


, (8,6), , ( 4.0 < 4.1).



. (4.0 4.5), ( ), .

GiST , . : RUM ( ), , B- , , .

R-


gist , , ( tsrange). , , .

. :

postgres=# create table reservations(during tsrange);
CREATE TABLE
postgres=# insert into reservations(during) values
('[2016-12-30, 2017-01-09)'),
('[2017-02-23, 2017-02-27)'),
('[2017-04-29, 2017-05-02)');
INSERT 0 3
postgres=# create index on reservations using gist(during);
CREATE INDEX

, , :

postgres=# select * from reservations where during && '[2017-01-01, 2017-04-01)';
during
-----------------------------------------------
["2016-12-30 00:00:00","2017-01-08 00:00:00")
["2017-02-23 00:00:00","2017-02-26 00:00:00")
(2 rows)

postgres=# explain (costs off) select * from reservations where during && '[2017-01-01, 2017-04-01)';
QUERY PLAN
------------------------------------------------------------------------------------
Index Only Scan using reservations_during_idx on reservations
Index Cond: (during && '["2017-01-01 00:00:00","2017-04-01 00:00:00")'::tsrange)
(2 rows)

&& ; , . , .

, , , . b-tree, , :

postgres=# select amop.amopopr::regoperator, amop.amoppurpose, amop.amopstrategy
from pg_opclass opc, pg_opfamily opf, pg_am am, pg_amop amop
where opc.opcname = 'range_ops'
and opf.oid = opc.opcfamily
and am.oid = opf.opfmethod
and amop.amopfamily = opc.opcfamily
and am.amname = 'gist'
and amop.amoplefttype = opc.opcintype;
amopopr | amoppurpose | amopstrategy
-------------------------+-------------+--------------
@>(anyrange,anyelement) | s | 16
<<(anyrange,anyrange) | s | 1
&<(anyrange,anyrange) | s | 2
&&(anyrange,anyrange) | s | 3
&>(anyrange,anyrange) | s | 4
>>(anyrange,anyrange) | s | 5
-|-(anyrange,anyrange) | s | 6
@>(anyrange,anyrange) | s | 7
<@(anyrange,anyrange) | s | 8
=(anyrange,anyrange) | s | 18
(10 rows)

( , btree.)


gevel. gist_print:

postgres=# select level, a from gist_print('reservations_during_idx')
as t(level int, valid bool, a tsrange);
level | a
-------+-----------------------------------------------
1 | ["2016-12-30 00:00:00","2017-01-09 00:00:00")
1 | ["2017-02-23 00:00:00","2017-02-27 00:00:00")
1 | ["2017-04-29 00:00:00","2017-05-02 00:00:00")
(3 rows)



GiST (exclude).

, . , : .

, . , :

  1. 젗 can_exclude (, , btree, gist spgist, gin);
  2. , : a b =
    b a.

(, , - ):

  • btree:
    • =

  • gist spgist:
    • &&
    • ~=
    • -|-


, , : . , B-.

. .

postgres=# alter table reservations add exclude using gist(during with &&);
ALTER TABLE

:

postgres=# insert into reservations(during) values ('[2017-06-10, 2017-06-13)');
INSERT 0 1

:

postgres=# insert into reservations(during) values ('[2017-05-15, 2017-06-15)');
ERROR: conflicting key value violates exclusion constraint "reservations_during_excl"
DETAIL: Key (during)=(["2017-05-15 00:00:00","2017-06-15 00:00:00")) conflicts with existing key (during)=(["2017-06-10 00:00:00","2017-06-13 00:00:00")).


btree_gist


. :

postgres=# alter table reservations add house_no integer default 1;
ALTER TABLE

, . GiST :

postgres=# alter table reservations drop constraint reservations_during_excl;
ALTER TABLE
postgres=# alter table reservations add exclude using gist(during with &&, house_no with =);
ERROR: data type integer has no default operator class for access method "gist"
HINT: You must specify an operator class for the index or define a default operator class for the data type.

btree_gist, GiST- , B-. , GiST , , , ?

postgres=# create extension btree_gist;
CREATE EXTENSION
postgres=# alter table reservations add exclude using gist(during with &&, house_no with =);
ALTER TABLE

- :

postgres=# insert into reservations(during, house_no) values ('[2017-05-15, 2017-06-15)', 1);
ERROR: conflicting key value violates exclusion constraint "reservations_during_house_no_excl"

:

postgres=# insert into reservations(during, house_no) values ('[2017-05-15, 2017-06-15)', 2);
INSERT 0 1

, GiST - , , , B- . , GiST .

RD-



PostgreSQL ( , ).

, , . ( , , .)

tsvector, . , , . , :

postgres=# set default_text_search_config = russian;
SET
postgres=# select to_tsvector(' , . , , .');
to_tsvector
--------------------------------------------------------------------
'':3,5 '':13 '':2 '':9 '':11 '':4 '':7
(1 row)

, ( -) (, ), , , , . , , .

젗 tsquery. , , , : &, |, !. .

postgres=# select to_tsquery(' & ( | )');
to_tsquery
----------------------------------
'' & ( '' | '' )
(1 row)

- @@.

postgres=# select to_tsvector(' , .') @@ to_tsquery(' & ( | )');
?column?
----------
t
(1 row)

postgres=# select to_tsvector(' , .') @@ to_tsquery(' & ( | )');
?column?
----------
f
(1 row)

. , GIN.

RD-


, , -, tsvector ( ), -, .  GiST.

postgres=# create table ts(doc text, doc_tsv tsvector);
CREATE TABLE
postgres=# create index on ts using gist(doc_tsv);
CREATE INDEX
postgres=# insert into ts(doc) values
(' '), (' '), (', , '),
(' '), (' '), (', , '),
(' '), (' '), (', , ');
INSERT 0 9
postgres=# update ts set doc_tsv = to_tsvector(doc);
UPDATE 9

( tsvector), , .

postgres=# select * from ts;
doc | doc_tsv
-------------------------+--------------------------------
| '':3 '':2 '':4
| '':3 '':2 '':4
, , | '':1,2 '':3
| '':2 '':3 '':1
| '':3 '':2 '':1
, , | '':3 '':1,2
| '':3 '':2
| '':1 '':2 '':3
, , | '':3 '':1,2
(9 rows)

? R- , . ⠗ RD- (RD Russian Doll, ). , .

RD- ,  , .

. , ᠗ . :



, , doc_tsv @@ to_tsquery('') , '':



. , TOAST, . , 젗 , .

, . , , 堗 . , .

: , , . - ( , -, ).

.

, :

1000000
0001000
0000010
0010000
0000100
0100000
0000100
0000001
0000010
0000010
0010000

:

0011010
0010110
, , 0110000
0011100
0010100
, , 0110000
0000011
1001010
, , 0100010


:



: , . : - .

doc_tsv @@ to_tsquery(''). , : 0010000. , :



: ,  , (, ) . , . , ( ) , .

, , : , , , , ( 0110000). tsvector, . , , . .

124 , , . . - , : tsvector , ( 1/16 , 8).


, , pgsql-hackers. 356125 , , :

fts=# select * from mail_messages order by sent limit 1;
-[ RECORD 1 ]------------------------------------------------------------------------
id | 1572389
parent_id | 1562808
sent | 1997-06-24 11:31:09
subject | Re: [HACKERS] Array bug is still there....
author | "Thomas G. Lockhart" <thomas.lockhart@jpl.nasa.gov>
body_plain | Andrew Martin wrote: +
| > Just run the regression tests on 6.1 and as I suspected the array bug +
| > is still there. The regression test passes because the expected output+
| > has been fixed to the *wrong* output. +
| +
| OK, I think I understand the current array behavior, which is apparently+
| different than the behavior for v1.0x. +
...

tsvector . (, ), , , .

fts=# alter table mail_messages add column tsv tsvector;
ALTER TABLE
fts=# update mail_messages
set tsv = to_tsvector(subject||' '||author||' '||body_plain);
NOTICE: word is too long to be indexed
DETAIL: Words longer than 2047 characters are ignored.
...
UPDATE 356125
fts=# create index on mail_messages using gist(tsv);
CREATE INDEX

, , - . :

fts=# explain (analyze, costs off)
select * from mail_messages where tsv @@ to_tsquery('magic & value');
QUERY PLAN
----------------------------------------------------------
Index Scan using mail_messages_tsv_idx on mail_messages
(actual time=0.998..416.335 rows=898 loops=1)
Index Cond: (tsv @@ to_tsquery('magic & value'::text))
Rows Removed by Index Recheck: 7859
Planning time: 0.203 ms
Execution time: 416.492 ms
(5 rows)

, 898 , 7859 , . .


gevel:

fts=# select level, a from gist_print('mail_messages_tsv_idx') as t(level int, valid bool, a gtsvector) where a is not null;
level | a
-------+-------------------------------
1 | 992 true bits, 0 false bits
2 | 988 true bits, 4 false bits
3 | 573 true bits, 419 false bits
4 | 65 unique words
4 | 107 unique words
4 | 64 unique words
4 | 42 unique words
...

gtsvector, , , tsvector. , (unique words), (true) (false) .

, ( ).


gist ( ):

amname | name | pg_indexam_has_property
--------+---------------+-------------------------
gist | can_order | f
gist | can_unique | f
gist | can_multi_col | t
gist | can_exclude | t

. , , .

:

name | pg_index_has_property
---------------+-----------------------
clusterable | t
index_scan | t
bitmap_scan | t
backward_scan | f

, , . :

name | pg_index_column_has_property
--------------------+------------------------------
asc | f
desc | f
nulls_first | f
nulls_last | f
orderable | f
search_array | f
search_nulls | t

( ; ; .)

, distance_orderable returnable, . , :

name | pg_index_column_has_property
--------------------+------------------------------
distance_orderable | t
returnable | t

, . 堗 . , , , , .

:

name | pg_index_column_has_property
--------------------+------------------------------
distance_orderable | f
returnable | t

, .

:

name | pg_index_column_has_property
--------------------+------------------------------
distance_orderable | f
returnable | f

, . , , tsvector : , , .


, GiST, ( ), .

IP- inet, :
  • cube cube . , , GiST: R- .
  • seg seg , , GiST- (R-).
  • intarray GiST-. : gist__int_ops (RD- ) gist__bigint_ops ( RD-). , 頗 .
  • ltree ltree GiST- (RD-).
  • pg_trgm gist_trgm_ops . 젗 GIN.

.
Original source: habrahabr.ru (comments, light).

https://habrahabr.ru/post/333878/

:  

: [1] []
 

:
: 

: ( )

:

  URL