Astuce .duckdbrc : place LOAD spatial; LOAD httpfs; dans ton fichier ~/.duckdbrc pour que les extensions se chargent automatiquement à chaque session — tu n'auras plus jamais à y penser.
2
Ouvrir un fichier GeoParquet local ou distant
DuckDB reconnaît automatiquement le format GeoParquet
-- Extraction POI sur le plateau de Saclay-- Remplace xmin/xmax/ymin/ymax par ta bounding boxSELECTid,
names.primary AS nom,
categories.primary AS categorie,
confidence,
ST_AsText(geometry) AS wkt
FROMread_parquet(
's3://overturemaps-us-west-2/release/2026-03-18.0/theme=places/type=place/*',
filename=true, hive_partitioning=true
)
WHEREbbox.xmin BETWEEN2.151298AND2.235049ANDbbox.ymin BETWEEN48.706094AND48.722411AND confidence > 0.8ORDER BY confidence DESCLIMIT500;
overture_buildings_bbox.sql
-- Empreintes de bâtiments avec hauteur et nomSELECTid,
names.primary AS nom,
height,
subtype,
class,
ST_Area(ST_GeomFromWKB(geometry)) AS surface_m2,
geometryFROMread_parquet(
's3://overturemaps-us-west-2/release/2026-03-18.0/theme=buildings/type=building/*.parquet'
)
WHEREbbox.xmin > 2.2946ANDbbox.xmax < 2.3920ANDbbox.ymin > 48.8396ANDbbox.ymax < 48.8749AND height IS NOT NULLORDER BY height DESC;
-- Jointure spatiale : points dans des polygones-- (ex : POI dans chaque commune)SELECT
z.nom_zone,
COUNT(p.id) AS nb_poi,
AVG(p.confidence) AS confiance_moy
FROMread_parquet('zones.parquet') z
JOINread_parquet('points.parquet') p
ONST_Within(
ST_GeomFromWKB(p.geometry),
ST_GeomFromWKB(z.geometry)
)
GROUP BY z.nom_zone
ORDER BY nb_poi DESC;
-- Plus proche voisin : trouver le POI le plus proche d'un pointSELECT
names.primary AS nom,
categories.primary AS type,
ST_Distance(
ST_GeomFromWKB(geometry),
ST_Point(2.3370, 48.8606)
) * 111320AS distance_m
FROMread_parquet('poi_paris.parquet')
WHERE
bbox.xmin BETWEEN2.31AND2.37AND bbox.ymin BETWEEN48.84AND48.88ORDER BY distance_m
LIMIT10;
Conversion locale : GeoParquet → GeoJSON en ligne de commande
convert_cli.sh
# Lancer DuckDB en mode CLI avec un script SQL
duckdb -c "LOAD spatial; LOAD httpfs;
COPY (SELECT * FROM read_parquet('data.parquet'))
TO 'output.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON');"# Ou en pipe (utile pour les workflows CI/CD)
echo "LOAD spatial;
COPY (SELECT id, geometry FROM read_parquet('input.parquet'))
TO 'out.fgb' WITH (FORMAT GDAL, DRIVER 'FlatGeobuf');" | duckdb
7
Workflow complet : Overture → analyse → export
-- Stats rapides sur un GeoParquet (remplace psql \d + SELECT count)SELECTCOUNT(*) AS nb_entites,
COUNT(DISTINCT categories.primary) AS nb_categories,
ROUND(AVG(confidence), 3) AS confiance_moy,
MIN(bbox.xmin) AS lon_min,
MAX(bbox.xmax) AS lon_max,
MIN(bbox.ymin) AS lat_min,
MAX(bbox.ymax) AS lat_max
FROMread_parquet('poi_paris.parquet');