Useful queries on Hive Metastore

# Table counts by Table type ( Managed VS External) in the cluster.
SELECT
    TBL_TYPE, COUNT(1) AS 'Count'
FROM
    hive.TBLS
GROUP BY TBL_TYPE
ORDER BY 2 DESC;

# Table counts by Table type grouped with  Owner id
SELECT
    owner, TBL_TYPE, COUNT(1) AS 'Count'
FROM
    hive.TBLS
GROUP BY owner , TBL_TYPE
ORDER BY 3 DESC;

#TABLE's key stats like Num of files , Num of Rows, Size of the table, Statistics ,Last DDL time
SELECT
    DB.NAME AS 'Database',
    TB.TBL_NAME AS 'Table Name',
    GROUP_CONCAT(IF(TP.PARAM_KEY = 'COLUMN_STATS_ACCURATE',
            TP.PARAM_VALUE,
            NULL)) AS 'Basic Stats',
    GROUP_CONCAT(IF(TP.PARAM_KEY = 'numFiles',
            TP.PARAM_VALUE,
            NULL)) AS 'Num_Files#',
    GROUP_CONCAT(IF(TP.PARAM_KEY = 'numRows',
            TP.PARAM_VALUE,
            NULL)) AS 'Num_Rows#',
    GROUP_CONCAT(IF(TP.PARAM_KEY = 'rawDataSize',
            ROUND((TP.PARAM_VALUE / 1024 / 1024 / 1024), 2),
            NULL)) AS 'Data_Size_GB#',
    GROUP_CONCAT(IF(TP.PARAM_KEY = 'transient_lastDdlTime',
            FROM_UNIXTIME(TP.PARAM_VALUE),
            NULL)) AS 'Last DDL Time'
FROM
    hive.TABLE_PARAMS TP,
    hive.TBLS TB,
    hive.DBS DB
WHERE
    TP.TBL_ID = TB.TBL_ID
        AND TB.DB_ID = DB.DB_ID
        AND TP.PARAM_KEY IN ('COLUMN_STATS_ACCURATE' , 'numFiles',
        'numRows',
        'rawDataSize',
        'transient_lastDdlTime')
GROUP BY DB.NAME , TB.TBL_NAME
ORDER BY 4 DESC;


#To collect HDFS Storage path for each table and Numer of partitions and Num_Buckets
 SELECT
    db.NAME AS 'Database',
    tb.TBL_NAME AS 'Table Name',
    storage.LOCATION AS 'HDFS Storage Path',
    COUNT(storage.NUM_BUCKETS) AS 'Num_Buckets'
FROM
    hive.SDS storage,
    hive.DBS db,
    hive.TBLS tb
WHERE
    db.DB_ID = tb.DB_ID
        AND storage.CD_ID = tb.TBL_ID
GROUP BY db.NAME , tb.TBL_NAME
ORDER BY COUNT(storage.NUM_BUCKETS) DESC;

No comments:

Post a Comment

Boost Your Download Speed with lftp Segmentation

Looking for a faster way to download files via sftp to a Linux machine? Try using "lftp" instead. This tool offers segmented downl...

Other relevant topics