JAVA开发(记一次删除完全相同pgSQL数据库记录只保留一条)

 

进行数据管理时,无效数据可能会对生产力和决策质量造成严重的影响。如何发现和处理无效数据变得愈发重要。一起来唠唠你会如何处理无效数据吧~

方向一:介绍无效数据的概念

最近遇到了pg数据库表中的大量数据重复了,需要删除其中的一条。一条条删除显然不切合实际,还是需要通过计算来删除。

方向二:无效数据的处理方法

 实施步骤:

1.对原表进行备份

2.使用一个表结构完全一样的临时表对原始表进行接收。

create  table  tmp_0524_1  as  select  * from  public.m_user_bak20230524;

3.找出重复的数据:

drop  table  tmp_0524_2;
create  table  tmp_0524_2  as
select * from  (
select  phone ,count(1) cn  from  tmp_0524_1  where  phone is  not  null   group  by phone ) t where t.cn > 1  ;

4.用第二个临时表接收数据:

drop  table  tmp_0524_3_1;
create  table  tmp_0524_3_1  as
select   t.* from  tmp_0524_1  t  where  phone  in (select phone  from  tmp_0524_2 ) and  opt_user is null;

5.建一个表结构一样的空表,多增加一个iid字段

create table  tmp_0524_3 as  select  '' iid , t.* from  tmp_0524_3_1 where 1 = 2;

6.给空表增加一个iid字段,建立一个字段自增

CREATE SEQUENCE tmp_0524_3_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
alter table tmp_0524_3  alter column iid set default nextval('tmp_0524_3_id_seq'); 

7.插入数据:

INSERT INTO public.tmp_0524_3
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from  tmp_0524_3_1;

8.删除相同数据中的一条:

DELETE FROM tmp_0524_3
WHERE iid
NOT IN (
SELECT max(iid)
FROM tmp_0524_3
GROUP BY  phone
);

9.删除原始表中的有相同数据的数据

delete from  m_user where  phone in (select phone from  tmp_0524_3);

10.将处理好的数据插回原始表:

INSERT INTO public.m_user
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from  tmp_0524_3;
 

方向三:如何减少无效数据

在并发系统中应该增加锁对数据进行插入,避免重复的插入数据。

方向四:实际案例举例

案例:

最近遇到了pg数据库表中的大量数据重复了,需要删除其中的一条。一条条删除显然不切合实际,还是需要通过计算来删除。

实施步骤:

1.对原表进行备份

2.使用一个表结构完全一样的临时表对原始表进行接收。

create  table  tmp_0524_1  as  select  * from  public.m_user_bak20230524;

3.找出重复的数据:

drop  table  tmp_0524_2;
create  table  tmp_0524_2  as
select * from  (
select  phone ,count(1) cn  from  tmp_0524_1  where  phone is  not  null   group  by phone ) t where t.cn > 1  ;

4.用第二个临时表接收数据:

drop  table  tmp_0524_3_1;
create  table  tmp_0524_3_1  as
select   t.* from  tmp_0524_1  t  where  phone  in (select phone  from  tmp_0524_2 ) and  opt_user is null;

5.建一个表结构一样的空表,多增加一个iid字段

create table  tmp_0524_3 as  select  '' iid , t.* from  tmp_0524_3_1 where 1 = 2;

6.给空表增加一个iid字段,建立一个字段自增

CREATE SEQUENCE tmp_0524_3_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
alter table tmp_0524_3  alter column iid set default nextval('tmp_0524_3_id_seq'); 

7.插入数据:

INSERT INTO public.tmp_0524_3
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from  tmp_0524_3_1;

8.删除相同数据中的一条:

DELETE FROM tmp_0524_3
WHERE iid
NOT IN (
SELECT max(iid)
FROM tmp_0524_3
GROUP BY  phone
);

9.删除原始表中的有相同数据的数据

delete from  m_user where  phone in (select phone from  tmp_0524_3);

10.将处理好的数据插回原始表:

INSERT INTO public.m_user
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from  tmp_0524_3;

 

猜你喜欢

转载自blog.csdn.net/dongjing991/article/details/130855503