@@ -413,6 +413,66 @@ def test_predicate_accept_in(store, predicate_value, expected):
413413 )
414414
415415
416+ @pytest .mark .parametrize (
417+ ["predicate_value" , "expected" ],
418+ [
419+ ([0 , 4 , 1 ], True ),
420+ ([- 2 , 44 ], True ),
421+ ([- 3 , 0 ], True ),
422+ ([- 1 , 10 ** 4 ], True ),
423+ ([2 , 3 ], True ),
424+ ([- 1 , 20 ], True ),
425+ ([- 30 , - 5 , 50 , 10 ], True ),
426+ ([- 30 , - 5 , 50 , np .nan ], True ),
427+ ([], True ),
428+ ],
429+ )
430+ def test_predicate_accept_notin (store , predicate_value , expected ):
431+ df = pd .DataFrame ({"A" : [0 , 4 , 13 , 29 ]}) # min = 0, max = 29
432+ predicate = ("A" , "not in" , predicate_value )
433+ serialiser = ParquetSerializer (chunk_size = None )
434+ key = serialiser .store (store , "prefix" , df )
435+
436+ parquet_file = ParquetFile (store .open (key ))
437+ row_meta = parquet_file .metadata .row_group (0 )
438+ arrow_schema = parquet_file .schema .to_arrow_schema ()
439+ parquet_reader = parquet_file .reader
440+ assert (
441+ _predicate_accepts (
442+ predicate ,
443+ row_meta = row_meta ,
444+ arrow_schema = arrow_schema ,
445+ parquet_reader = parquet_reader ,
446+ )
447+ == expected
448+ )
449+
450+
451+ @pytest .mark .parametrize (
452+ ["predicate_value" , "test_data" ],
453+ [
454+ ([0 ], [0 , 0 ]),
455+ ([0 , np .nan ], [0 , 0 , np .nan ]),
456+ ],
457+ )
458+ def test_predicate_accept_notin_excludes (store , predicate_value , test_data ):
459+ df = pd .DataFrame ({"A" : test_data }) # min = 0, max = 29
460+ predicate = ("A" , "not in" , predicate_value )
461+ serialiser = ParquetSerializer (chunk_size = None )
462+ key = serialiser .store (store , "prefix" , df )
463+
464+ parquet_file = ParquetFile (store .open (key ))
465+ row_meta = parquet_file .metadata .row_group (0 )
466+ arrow_schema = parquet_file .schema .to_arrow_schema ()
467+ parquet_reader = parquet_file .reader
468+ assert not _predicate_accepts (
469+ predicate ,
470+ row_meta = row_meta ,
471+ arrow_schema = arrow_schema ,
472+ parquet_reader = parquet_reader ,
473+ )
474+
475+
416476def test_read_categorical (store ):
417477 df = pd .DataFrame ({"col" : ["a" ]}).astype ({"col" : "category" })
418478
0 commit comments