HIVE 创建分区表 并利用Python UDF加解密
目录
总结
Python写UDF没Java方便,Python的UDF需要写一大堆类似“TRANSFORM……USING……AS……”的语句,而Java解决依赖库之后可以封装成函数。但是JAVA依赖各种头疼,各有利弊。如果查询HIVE引擎使用dropbox的PyHive,其实这里Python可以利用字符串再次格式化封装二次解析解决这个”TRANSFORM……USING……AS……”的问题。
HIVE创建数据表
创建数据库
1 |
create database test; |
创建表,并以par_date(日期)作为分区字段
1 2 3 4 5 6 |
create table test.cppla ( id int, name string ) partitioned by (par_date string) row format delimited fields terminated by','; |
插入数据
1 2 |
insert into test.cppla partition(par_date='2019-10-14') values(1, 'zhangsan'); insert into test.cppla partition(par_date='2019-10-15') values(2, 'lisi'); |
增加手机号加密列
1 |
ALTER TABLE test.cppla ADD COLUMNS (phone STRING COMMENT 'encrypt phone string'); |
插入手机号加密数据
1 2 |
insert into test.cppla partition(par_date='2019-11-06') values(8, 'whoami', 'AavnaU/kM0niJgrqPSGG2HAAA54FAYtd9s4%2BeJrRPYSEvsdLBKRaSMGy4HjN4t9Bm7UV/qLNnSlP2hmPxL5nTzfxnYcd1t0rJHopanPCnyC7jViAYXHyLRUNsoNRnh1wBxZCSC%2B7GA3xc46q2ecjYO0mqf3/ew3v2c/zpmYneC9ruKD5aCSXirWkVH2ggM4qKMBY7K92Gii4GhTStgljjX%2Be6KIQrgxdAdbGYkJob%2BVYzyEENmDkcd970/cpO5S7%2BXW9ud3bqV6Im6A5bWjELec6H8BNcVo4ACTWy28kfRS/Rf2s2FOqpN70cjs2T/Wi2f8p6deI8d8se%2BWoKoKk1%2Bfn5aJ1CXE%2BU9PrLqUH/beLWMfcxwEeaNprLhWt0cgir7BQG4xLf%2BJrINrEcx44EnK9SjWCg0gZlW9XFQx9h/5ZwuaXtUx1Ie6570XBJ8blnrW4Fm2eZNSZQAI4Nkcpi2PG2x3r6iRChL9O9PDmPtjmvl/CwLH1RkHe/03jX5dSE0siIJV8UgPjYt8G0aW0vxfYGtyqRcLrt7pHRqAc4ouo/uRu4A68gbcSntkrX27hETfXz5WAgYMIgQ1Ph70Djx48MT0LmL7nbknCH3kLdCdjSBPfViEPGO5haDxWkm0uNt2IwgDgHhtnmMsHhEhCV8vTTZzGmJXNKEtxzIl4ZkU%3D'); insert into test.cppla partition(par_date='2019-11-06') values(9, 'cppla', 'e6uYTyEI2pdR%2BkBAfFoFQMlGiBRWb/wHlVUYLiPBDDdnD5Er%2B5NSJacW7w%2BrEcW/qdcNiwuoHNihBJD7Db9XCEy0UsbuamSD0lbZZ9T4Wun6JKRU9HxnHBp0xNMmeeTICo5wGJW2cpHFVCdQxRkzdb/8SRPzz40uOHzaIkz7vgwWs2nUbj5oOms0Zpq%2Bxp2Yhu3KIRECroPBDfJmThmYrPn0wpF4J9S/%2BQqfhYVA7ae0D/n32i1FmROkx1yXrPtzRVjMmxdNRhKo/eOM81vwGwq40fC1zGDun6h%2BD///DI9X0yR0A7aKTdFupmSLfBf3GGfjlm6NdnR7YWFdf2gBer00uHGiRDJpd63THZvhsG3fgI1VddsXM1DiDMr8LB1KizGtB3JVMmTaw2YCC6zxhASHirSJ0yLb2qytV%2BQ6MHWyXUscnoGbBU%2Bwj5lVJ2PoXroGz0hLMbRvoqFIl1yyxPft4zxTfO6eLyE1ONnZjXSxOvXdIQjkKoRtLfZNL9JJiZtD4NWtN59akeFS1Dp/cPb6HnREn40R9DlIFRjX33oOmRyzJSvQ1Hhu7xxki95dfJujBnDJzdzD0yPk9BxvKOQ5EHS4vT6HNQJAn%2Bg6VBEL/gVhd4S30AUQCCKspOGfKyAqakZ/vfSFi/lUUqncWP6n2b5OvN316Xep0cL/Vvo%3D'); |
查看test.cppla表数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
hive> select id, name, phone from test.cppla; OK 1 zhangsan NULL 2 lisi NULL 2 lisi NULL NULL NULL NULL 1 zhangsan NULL 1 张三 NULL 1 里斯 NULL 3 张三 NULL 8 whoami AavnaU/kM0niJgrqPSGG2HAAA54FAYtd9s4%2BeJrRPYSEvsdLBKRaSMGy4HjN4t9Bm7UV/qLNnSlP2hmPxL5nTzfxnYcd1t0rJHopanPCnyC7jViAYXHyLRUNsoNRnh1wBxZCSC%2B7GA3xc46q2ecjYO0mqf3/ew3v2c/zpmYneC9ruKD5aCSXirWkVH2ggM4qKMBY7K92Gii4GhTStgljjX%2Be6KIQrgxdAdbGYkJob%2BVYzyEENmDkcd970/cpO5S7%2BXW9ud3bqV6Im6A5bWjELec6H8BNcVo4ACTWy28kfRS/Rf2s2FOqpN70cjs2T/Wi2f8p6deI8d8se%2BWoKoKk1%2Bfn5aJ1CXE%2BU9PrLqUH/beLWMfcxwEeaNprLhWt0cgir7BQG4xLf%2BJrINrEcx44EnK9SjWCg0gZlW9XFQx9h/5ZwuaXtUx1Ie6570XBJ8blnrW4Fm2eZNSZQAI4Nkcpi2PG2x3r6iRChL9O9PDmPtjmvl/CwLH1RkHe/03jX5dSE0siIJV8UgPjYt8G0aW0vxfYGtyqRcLrt7pHRqAc4ouo/uRu4A68gbcSntkrX27hETfXz5WAgYMIgQ1Ph70Djx48MT0LmL7nbknCH3kLdCdjSBPfViEPGO5haDxWkm0uNt2IwgDgHhtnmMsHhEhCV8vTTZzGmJXNKEtxzIl4ZkU%3D 9 cppla e6uYTyEI2pdR%2BkBAfFoFQMlGiBRWb/wHlVUYLiPBDDdnD5Er%2B5NSJacW7w%2BrEcW/qdcNiwuoHNihBJD7Db9XCEy0UsbuamSD0lbZZ9T4Wun6JKRU9HxnHBp0xNMmeeTICo5wGJW2cpHFVCdQxRkzdb/8SRPzz40uOHzaIkz7vgwWs2nUbj5oOms0Zpq%2Bxp2Yhu3KIRECroPBDfJmThmYrPn0wpF4J9S/%2BQqfhYVA7ae0D/n32i1FmROkx1yXrPtzRVjMmxdNRhKo/eOM81vwGwq40fC1zGDun6h%2BD///DI9X0yR0A7aKTdFupmSLfBf3GGfjlm6NdnR7YWFdf2gBer00uHGiRDJpd63THZvhsG3fgI1VddsXM1DiDMr8LB1KizGtB3JVMmTaw2YCC6zxhASHirSJ0yLb2qytV%2BQ6MHWyXUscnoGbBU%2Bwj5lVJ2PoXroGz0hLMbRvoqFIl1yyxPft4zxTfO6eLyE1ONnZjXSxOvXdIQjkKoRtLfZNL9JJiZtD4NWtN59akeFS1Dp/cPb6HnREn40R9DlIFRjX33oOmRyzJSvQ1Hhu7xxki95dfJujBnDJzdzD0yPk9BxvKOQ5EHS4vT6HNQJAn%2Bg6VBEL/gVhd4S30AUQCCKspOGfKyAqakZ/vfSFi/lUUqncWP6n2b5OvN316Xep0cL/Vvo%3D Time taken: 0.179 seconds, Fetched: 10 row(s) hive> |
Python-RSA加密解密程序
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# coding: utf-8 import rsa import base64 from urllib import request PUBLIC_FILE_PATH = "/tmp/public8.pem" PRIVATE_FILE_PATH = "/tmp/private1.pem" def create_new_keys(lens): (public_key, private_key) = rsa.newkeys(lens) with open('/tmp/public1.pem', 'wb') as f: f.write(public_key.save_pkcs1()) with open('/tmp/private1.pem', 'wb') as f: f.write(private_key.save_pkcs1()) def rsa_encrypt(msg): with open(PUBLIC_FILE_PATH, 'rb') as public_file: public_key = rsa.PublicKey.load_pkcs1_openssl_pem(public_file.read()) code = rsa.encrypt(msg.encode('utf-8'), public_key) code = base64.b64encode(code).decode('utf-8') code = request.quote(code) return code def rsa_decrypt(code): code = request.unquote(code) with open(PRIVATE_FILE_PATH, 'rb') as private_file: private_key = rsa.PrivateKey.load_pkcs1(private_file.read()) code = base64.b64decode(code.encode('utf-8')) msg = rsa.decrypt(code, private_key).decode('utf-8') return msg if __name__ == '__main__': # first:创建pkcs1格式公钥私钥 # create new keys: pkcs1[private, public] # create_new_keys(4096) # second:pkcs1私钥转pkcs8私钥 # convert pkcs1 private to pkcs8 private # openssl pkcs8 -topk8 -inform PEM -in private1.pem -outform pem -nocrypt -out private8.pem # third:pkcs8私钥转pkcs8公钥 # convert pkcs8 private to pkcs8 public # openssl rsa -in private8.pem -pubout -out public8.pem # fourth:利用pkcs1私钥和pkcs8公钥加密解密数据 oldstr = "cppla" print(oldstr) encryptstr = rsa_encrypt(msg=oldstr) print(encryptstr) decryptstr = rsa_decrypt(encryptstr) print(decryptstr) |
按照Python-RSA程序步骤创建pkcs1私钥和pkcs8公钥
1 2 3 4 5 |
# 一、Python生成密钥 # /tmp/public1.pem # /tmp/private1.pem # 二、OpenSSL转换对应格式密钥 # /tmp/public8.pem |
Python-UDF: python_rsa_udf.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# coding: utf-8 import sys reload(sys) sys.setdefaultencoding("utf8") import rsa import base64 from urllib import quote,unquote PUBLIC_FILE_PATH = "/tmp/public8.pem" PRIVATE_FILE_PATH = "/tmp/private1.pem" def create_new_keys(lens): (public_key, private_key) = rsa.newkeys(lens) with open('/tmp/public1.pem', 'wb') as f: f.write(public_key.save_pkcs1()) with open('/tmp/private1.pem', 'wb') as f: f.write(private_key.save_pkcs1()) def rsa_encrypt(msg): with open(PUBLIC_FILE_PATH, 'rb') as public_file: public_key = rsa.PublicKey.load_pkcs1_openssl_pem(public_file.read()) code = rsa.encrypt(msg.encode('utf-8'), public_key) code = base64.b64encode(code).decode('utf-8') code = quote(code) return code def rsa_decrypt(code): code = unquote(code) with open(PRIVATE_FILE_PATH, 'rb') as private_file: private_key = rsa.PrivateKey.load_pkcs1(private_file.read()) code = base64.b64decode(code.encode('utf-8')) msg = rsa.decrypt(code, private_key).decode('utf-8') return msg def test(stream): for line in stream: id,name,phone = line.strip().split("\t") name_encrypt = rsa_encrypt(name.decode()) name_decrypt = rsa_decrypt(name_encrypt.decode()) phone_decrypt = rsa_decrypt(phone.decode()) print("\t".join([id, name, phone, name_encrypt, name_decrypt, phone_decrypt])) if __name__ == '__main__': test(sys.stdin) |
HIVE-SQL:python_rsa_udf.sql
1 2 3 4 5 6 7 |
ADD FILE /opt/python_rsa_udf.py; SELECT TRANSFORM (c.id, c.name, c.phone) USING 'python python_rsa_udf.py' AS (id int, name string, phone string, name_encrypt string, name_decrypt string, phone_decrypt string) FROM test.cppla c where c.id in (8,9); |
运行结果
hive -f python_rsa_udf.sql
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
Query ID = root_20191106092916_cd84fdc7-3cfb-4a78-bbcc-b07eca4c64ad Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks is set to 0 since there's no reduce operator Job running in-process (local Hadoop) 2019-11-06 09:29:21,015 Stage-1 map = 0%, reduce = 0% 2019-11-06 09:29:22,028 Stage-1 map = 100%, reduce = 0% Ended Job = job_local22698763_0001 MapReduce Jobs Launched: Stage-Stage-1: HDFS Read: 2204 HDFS Write: 0 SUCCESS Total MapReduce CPU Time Spent: 0 msec OK 8 whoami AavnaU/kM0niJgrqPSGG2HAAA54FAYtd9s4%2BeJrRPYSEvsdLBKRaSMGy4HjN4t9Bm7UV/qLNnSlP2hmPxL5nTzfxnYcd1t0rJHopanPCnyC7jViAYXHyLRUNsoNRnh1wBxZCSC%2B7GA3xc46q2ecjYO0mqf3/ew3v2c/zpmYneC9ruKD5aCSXirWkVH2ggM4qKMBY7K92Gii4GhTStgljjX%2Be6KIQrgxdAdbGYkJob%2BVYzyEENmDkcd970/cpO5S7%2BXW9ud3bqV6Im6A5bWjELec6H8BNcVo4ACTWy28kfRS/Rf2s2FOqpN70cjs2T/Wi2f8p6deI8d8se%2BWoKoKk1%2Bfn5aJ1CXE%2BU9PrLqUH/beLWMfcxwEeaNprLhWt0cgir7BQG4xLf%2BJrINrEcx44EnK9SjWCg0gZlW9XFQx9h/5ZwuaXtUx1Ie6570XBJ8blnrW4Fm2eZNSZQAI4Nkcpi2PG2x3r6iRChL9O9PDmPtjmvl/CwLH1RkHe/03jX5dSE0siIJV8UgPjYt8G0aW0vxfYGtyqRcLrt7pHRqAc4ouo/uRu4A68gbcSntkrX27hETfXz5WAgYMIgQ1Ph70Djx48MT0LmL7nbknCH3kLdCdjSBPfViEPGO5haDxWkm0uNt2IwgDgHhtnmMsHhEhCV8vTTZzGmJXNKEtxzIl4ZkU%3D dBCPY0ZOKbwFgUMj4QpYW9pt1k0p03vdRASj1p0KoNuzqBQEJfYA3kfS95%2BjGV4T93ql/9wBoSDE8CqN6VHJZnKWt7BzzsISe2eVnfVFzXqnH9WF9mdtpfGSHBlvUTVDOF6ySSrvCNdebj6g5%2BhxOpX25LQuZjajlycLoot5Nv9AOyL7U/zaEg9XDuclNBcBBCPTSlEFO3e%2BfQlAxAqsCr4vLwVFnD9woTOhkb94FVP9OzGgh21Gl1pHW7zCPzFtbr3Fd3n2/Pr4xnnDc5aTzc9tu21IDgZ6%2BZ2jhE8A0/NgECyoIRxnjU0BhVr1PiYcCzRg9Ko1YOLs5dFxk2YX3zWaUEzoPht2OkkbxPzxElSLN6UMtmCpL6o4FVaRk8lPGaQPHC6WD5KhWe4NXebFQirPn97Ar266NWLm/nJ55ZfMubX1KoQ2OZn/FpjhTN3ZJ91c2tOPNXwKhG5t/NpkAIp7R286UjQR98Mk6yuUjpOV6n0nBxhf6pCbCMq/rMshnTKrOv3CUd5zXCUsz6bVR7938Xkg5l3b6lhAvu4cH3IXCZFRjbfmBXmogoujCMNGHhQwBwro74wly9a6FNcnAP2hEuBIBRgADyf8AWVA2sTishvhZetq0Lxn6j7Gxmpvbo3r9oZPR/byl6sP0lmiXn28g5rmBCVngYN%2BvqpJBrU%3D whoami whoami 9 cppla e6uYTyEI2pdR%2BkBAfFoFQMlGiBRWb/wHlVUYLiPBDDdnD5Er%2B5NSJacW7w%2BrEcW/qdcNiwuoHNihBJD7Db9XCEy0UsbuamSD0lbZZ9T4Wun6JKRU9HxnHBp0xNMmeeTICo5wGJW2cpHFVCdQxRkzdb/8SRPzz40uOHzaIkz7vgwWs2nUbj5oOms0Zpq%2Bxp2Yhu3KIRECroPBDfJmThmYrPn0wpF4J9S/%2BQqfhYVA7ae0D/n32i1FmROkx1yXrPtzRVjMmxdNRhKo/eOM81vwGwq40fC1zGDun6h%2BD///DI9X0yR0A7aKTdFupmSLfBf3GGfjlm6NdnR7YWFdf2gBer00uHGiRDJpd63THZvhsG3fgI1VddsXM1DiDMr8LB1KizGtB3JVMmTaw2YCC6zxhASHirSJ0yLb2qytV%2BQ6MHWyXUscnoGbBU%2Bwj5lVJ2PoXroGz0hLMbRvoqFIl1yyxPft4zxTfO6eLyE1ONnZjXSxOvXdIQjkKoRtLfZNL9JJiZtD4NWtN59akeFS1Dp/cPb6HnREn40R9DlIFRjX33oOmRyzJSvQ1Hhu7xxki95dfJujBnDJzdzD0yPk9BxvKOQ5EHS4vT6HNQJAn%2Bg6VBEL/gVhd4S30AUQCCKspOGfKyAqakZ/vfSFi/lUUqncWP6n2b5OvN316Xep0cL/Vvo%3D fCyK23%2BqvDDYM4s20AcnmtcPQGr6O4MvJxpZcNWfLggLP3hX2Ag1I6P5lmL/pseypw05SPvRi0kZQcypUoEf0gZ%2Bk0nrAr9J%2BvrijS9gnOPt2Ztqy1WoztRbSiPQdpyZSd5gcPOMsS6sIF3AU8z0mdbGaCrmW9N3u2YsZdyz3GoFCH1dOkMuSRy4nxZt9odex2b0Ccw/%2BJLgQciEDeK2qbem3W3ll3fljUII/NZxVZwOYU3MndmqI4L4dCCvyV1SWhqPXChGzoqbESafE0himCQmIZ1tsY65rqxry8wwsBmNaw0WlTq715pTDUwISzFuntsz4RWrYY8Sky28XD9iob1hONNTMFNlYumWV610uGddEBhI7H3MhOnz3BSZgWYhYTAiM/ob9zEtS1Om%2BkXQOtYZXf%2BQJJUvXR/BFbSDa692d0sNpB5BDc3cu/oO1Lh9r4vS3P9KzAZZ4KB7NRRhqN7TeF2t54WsfC1qWuxJLlZjm2fJ9niTEtIWgKJ4iuXWJwwCBZcXe7TsZJhjEjwPS9fgsv11M%2B9XZ3rl0rRTMlJaBTLdVApBuNIuOoCSJTFy9LgFMDR1gtdIMwh2gfC%2Bnstu9w9JTFUGSPF3v4/b762H/sLbOMfhObHDxdvNAR84IbgwGy8RrvzThZb4/C43glEdENs6tIjVbtfQ07rRjwQ%3D cppla cppla Time taken: 5.259 seconds, Fetched: 2 row(s) |