网约车大数据综合项目——数据分析Hive
第1关:Hive 储存数据
开启Hadoop服务,并使得 Hive 连接 MySQL 初始化
start-all.sh schematool -dbType mysql -initSchema
进入hive命令行
hive
创建数据库并连接数据库
create database trafficdata; use trafficdata;
创建表并从本地导入数据
create table cancelorder(companyid string,address string,districtname string,orderid string,ordertime string,canceltime string,operator string,canceltypecode string,cancelreason string) row format delimited fields terminated by '|'; load data local inpath '/data/workspace/myshixun/data/canceldata.txt' into table cancelorder; create table createorder(companyid string,address string,districtname string,orderid string,departtime string,ordertime string,departure string,deplongitude string,deplatitude string,destination string,destlongitude string,destlatitude string) row format delimited fields terminated by '\t'; load data local inpath '/data/workspace/myshixun/data/createdata.txt' into table createorder;
第2关:统计撤销订单中撤销理由最多的前 10 种理由
开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)
start-all.sh schematool -dbType mysql -initSchema
进入hive命令行
hive
连接数据库
use trafficdata;
创建表
create table cancelreason(cancelreason string,num int) row format delimited fields terminated by '\t';
查询数据并插入表
insert into cancelreason select cancelreason,count(*) num from cancelorder where cancelreason != '未知' group by cancelreason order by num desc limit 10;
上传表
export table cancelreason to'/user/hadoop/cancelreason';
另开命令行进入MySQL
mysql -h127.0.0.1 -uroot -p123123
创建数据库并连接数据库
create database trafficdata; use trafficdata;
创建表
create table cancelreason(
cancelreason varchar(255),
num int not null
);
退出MySQL
exit;
下载数据到MySQL的表
sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/cancelreason/data/000000_0' --table cancelreason --fields-terminated-by '\t';
第3关:查询出成功订单最多的 10 个行政区名
开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)
start-all.sh schematool -dbType mysql -initSchema
进入hive命令行
hive
连接数据库
use trafficdata;
创建表
create table order_district(district string,num int) row format delimited fields terminated by '\t';
查询数据并插入表
insert into order_district select districtname,count(*) num from createorder group by districtname order by num desc limit 10;
上传表
export table order_district to'/user/hadoop/order_district';
另开命令行进入MySQL
mysql -h127.0.0.1 -uroot -p123123
连接数据库(无数据库需要新建数据库)
use trafficdata;
创建表
create table order_district(
cancelreason varchar(255),
num int not null
);
退出MySQL
exit;
下载数据到MySQL的表
sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/order_district/data/000000_0' --table order_district --fields-terminated-by '\t';
第4关:查询湖南省各个市的所有订单总量
开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)
start-all.sh schematool -dbType mysql -initSchema
进入hive命令行
hive
连接数据库
use trafficdata;
创建表
create table orderbycity(city varchar(255),num int) row format delimited fields terminated by '\t';
查询数据并插入表
INSERT INTO orderbycity (city, num) SELECT '湖南省长沙市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省长沙市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省长沙市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省株洲市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省株洲市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省株洲市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省湘潭市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省湘潭市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省湘潭市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省衡阳市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省衡阳市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省衡阳市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省邵阳市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省邵阳市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省邵阳市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省岳阳市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省岳阳市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省岳阳市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省常德市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省常德市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省常德市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省张家界市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省张家界市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省张家界市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省益阳市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省益阳市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省益阳市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省娄底市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省娄底市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省娄底市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省郴州市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省郴州市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省郴州市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省永州市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省永州市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省永州市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省怀化市' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省怀化市%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省怀化市%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result; INSERT INTO orderbycity (city, num) SELECT '湖南省湘西土家族苗族自治州' AS districtname, SUM(total_num) AS total_num FROM (SELECT districtname, SUM(num) AS total_num FROM (SELECT districtname, COUNT(*) AS num FROM cancelorder WHERE districtname LIKE '湖南省湘西土家族苗族自治州%' GROUP BY districtname UNION ALL SELECT districtname, COUNT(*) AS num FROM createorder WHERE districtname LIKE '湖南省湘西土家族苗族自治州%' GROUP BY districtname) AS combined GROUP BY districtname) AS final_result;
上传表
export table orderbycity to'/user/hadoop/orderbycity';
另开命令行进入MySQL
mysql -h127.0.0.1 -uroot -p123123
连接数据库(无数据库需要新建数据库)
use trafficdata;
创建表
create table orderbycity(
city varchar(255),
num int not null
);
退出MySQL
exit;
下载数据到MySQL的表
sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_1' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_2' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_3' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_4' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_5' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_6' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_7' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_8' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_9' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_10' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_11' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_12' --table orderbycity --fields-terminated-by '\t'; sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/orderbycity/data/000000_0_copy_13' --table orderbycity --fields-terminated-by '\t';
第5关:统计湖南省当天的每分钟订单总数量
开启Hadoop服务,并使得 Hive 连接 MySQL 初始化(已开启无需重复启动)
start-all.sh schematool -dbType mysql -initSchema
进入hive命令行
hive
连接数据库
use trafficdata;
创建表
create table order_province_time(`time` string,num int) row format delimited fields terminated by '\t';
查询数据并插入表
insert into order_province_time select datetime, sum(num) as total_num from (select date_format(ordertime, 'yyyy-MM-dd HH:mm') as datetime, count(ordertime) as num from cancelorder where districtname like '湖南省%' group by date_format(ordertime, 'yyyy-MM-dd HH:mm') union all select date_format(departtime, 'yyyy-MM-dd HH:mm') as datetime, count(ordertime) as num from createorder where districtname like '湖南省%' group by date_format(departtime, 'yyyy-MM-dd HH:mm')) as combined group by datetime order by datetime;
上传表
export table order_province_time to'/user/hadoop/order_province_time';
另开命令行进入MySQL
mysql -h127.0.0.1 -uroot -p123123
连接数据库(无数据库需要新建数据库)
use trafficdata;
创建表
create table order_province_time(
times varchar(255),
num int not null
);
退出MySQL
exit;
下载数据到MySQL的表
sqoop export --connect jdbc:mysql://127.0.0.1:3306/trafficdata --username root --password 123123 --export-dir '/user/hadoop/order_province_time/data/000000_0' --table order_province_time --fields-terminated-by '\t';
(图片来源网络,侵删)
免责声明:我们致力于保护作者版权,注重分享,被刊用文章因无法核实真实出处,未能及时与作者取得联系,或有版权异议的,请联系管理员,我们会立即处理! 部分文章是来自自研大数据AI进行生成,内容摘自(百度百科,百度知道,头条百科,中国民法典,刑法,牛津词典,新华词典,汉语词典,国家院校,科普平台)等数据,内容仅供学习参考,不准确地方联系删除处理! 图片声明:本站部分配图来自人工智能系统AI生成,觅知网授权图片,PxHere摄影无版权图库和百度,360,搜狗等多加搜索引擎自动关键词搜索配图,如有侵权的图片,请第一时间联系我们,邮箱:ciyunidc@ciyunshuju.com。本站只作为美观性配图使用,无任何非法侵犯第三方意图,一切解释权归图片著作权方,本站不承担任何责任。如有恶意碰瓷者,必当奉陪到底严惩不贷!
