选读SQL经典实例笔记21_字符串处理

1. SQL 并不专门用于处理复杂的字符串

1.1. 需要有逐字遍历字符串的能力。但是,使用SQL 进行这样的操作并不容易

1.2. SQL 没有Loop循环功能

1.2.1. Oracle的MODEL子句除外

2. 遍历字符串

2.1. 把EMP表的ENAME等于KING的字符串拆开来显示为4行,每行一个字符

2.2. sql

select substr(e.ename,iter.pos,1) as C

  from (select ename from emp where ename = 'KING') e,
       (select id as pos from t10) iter
where iter.pos <= length(e.ename)
C
-
K
I
N
G

2.3. T10表,该表有10行记录(它只有一列,列名为ID,它的值分别是从1到10

3. 嵌入引号

3.1. sql

QMARKS
--------------
g'day mate
beavers' teeth
'

3.2. sql

select 'g''day mate' qmarks from t1 union all
select 'beavers'' teeth'    from t1 union all
select ''''                 from t1

4. 统计字符出现的次数

4.1. 10,CLARK,MANAGER

4.1.1. 该字符串里有多少个逗号

4.2. sql

 select (length('10,CLARK,MANAGER')-
       length(replace('10,CLARK,MANAGER',',','')))/length(',')
       as cnt
  from t1

4.3. 获取不含逗号的字符串长度

4.4. 逗号的删除则借助了REPLACE函数

5. 删除不想要的字符

5.1. sql

ENAME             SAL
---------- ----------
SMITH             800
ALLEN            1600
WARD             1250
JONES            2975
MARTIN           1250
BLAKE            2850
CLARK            2450
SCOTT            3000
KING             5000
TURNER           1500
ADAMS            1100
JAMES             950
FORD             3000
MILLER           1300

5.2. sql

ENAME      STRIPPED1         SAL STRIPPED2
--------- ---------- ---------- ---------
SMITH      SMTH              800 8
ALLEN      LLN              1600 16
WARD       WRD              1250 125
JONES      JNS              2975 2975
MARTIN     MRTN             1250 125
BLAKE      BLK              2850 285
CLARK      CLRK             2450 245
SCOTT      SCTT             3000 3
KING       KNG              5000 5
TURNER     TRNR             1500 15
ADAMS      DMS              1100 11
JAMES      JMS               950 95
FORD       FRD              3000 3
MILLER     MLLR             1300 13

5.3. DB2

5.3.1. sql

select ename,
       replace(translate(ename,'aaaaa','AEIOU'),'a','') stripped1,
       sal,
       replace(cast(sal as char(4)),'0','') stripped2
  from emp

5.4. Oracle

5.5. PostgreSQL

5.6. 使用内置函数TRANSLATE和REPLACE删除不想要的字符和字符串

5.6.1. sql

select ename,
        replace(translate(ename,'AEIOU','aaaaa'),'a')
        as stripped1,
        sal,
        replace(sal,0,'') as stripped2
   from emp

5.7. MySQL

5.8. SQL Server

5.9. 多次调用REPLACE 函数

5.9.1. sql

select ename,
        replace(
        replace(
        replace(
        replace(
        replace(ename,'A',''),'E',''),'I',''),'O',''),'U','')
        as stripped1,
        sal,
        replace(sal,0,'') stripped2
   from emp

6. 分离数字和字符数据

6.1. sql

DATA
---------------
SMITH800
ALLEN1600
WARD1250
JONES2975
MARTIN1250
BLAKE2850
CLARK2450
SCOTT3000
KING5000
TURNER1500
ADAMS1100
JAMES950
FORD3000
MILLER1300

6.2. DB2

6.2.1. sql

select replace(
    translate(data,'0000000000','0123456789'),'0','') ename,
          cast(
       replace(
     translate(lower(data),repeat('z',26),
           'abcdefghijklmnopqrstuvwxyz'),'z','') as integer) sal
    from (
  select ename||cast(sal as char(4)) data
    from emp
           ) x

6.3. Oracle

6.3.1. sql

select replace(
      translate(data,'0123456789','0000000000'),'0') ename,
      to_number(
        replace(
        translate(lower(data),
                  'abcdefghijklmnopqrstuvwxyz',
                   rpad('z',26,'z')),'z')) sal
   from (
 select ename||sal data
   from emp
        )

6.4. PostgreSQL

6.4.1. sql

select replace(
      translate(data,'0123456789','0000000000'),'0','') as ename,
           cast(
        replace(
      translate(lower(data),
                'abcdefghijklmnopqrstuvwxyz',
                rpad('z',26,'z')),'z','') as integer) as sal
   from (
 select ename||sal as data
   from emp
        ) x

7. 按照子字符串排序

7.1. sql

ENAME
----------
SMITH
ALLEN
WARD
JONES
MARTIN
BLAKE
CLARK
SCOTT
KING
TURNER
ADAMS
JAMES
FORD
MILLER

7.2. DB2

7.3. Oracle

7.4. PostgreSQL

7.5. MySQL

7.6. 使用内置函数LENGTH和SUBSTR

7.6.1. sql

select ename
   from emp
  order by substr(ename,length(ename)-1,2)

7.7. SQL Server

7.7.1. sql

select ename
  from emp
 order by substring(ename,len(ename)-1,2)

8. 根据字符串里的数字排序

8.1. sql

DATA
-----------------------------
CLARK   7782 ACCOUNTING
KING    7839 ACCOUNTING
MILLER  7934 ACCOUNTING
SMITH   7369 RESEARCH
JONES   7566 RESEARCH
SCOTT   7788 RESEARCH
ADAMS   7876 RESEARCH
FORD    7902 RESEARCH
ALLEN   7499 SALES
WARD    7521 SALES
MARTIN  7654 SALES
BLAKE   7698 SALES
TURNER  7844 SALES
JAMES   7900 SALES

8.2. DB2

8.2.1. sql

select data
   from V
  order by
         cast(
      replace(
    translate(data,repeat('#',length(data)),
      replace(
    translate(data,'##########','0123456789'),
             '#','')),'#','') as integer)

8.3. Oracle

8.3.1. sql

select data
   from V
  order by
         to_number(
           replace(
         translate(data,
           replace(
         translate(data,'0123456789','##########'),
                  '#'),rpad('#',20,'#')),'#'))

8.4. PostgreSQL

8.4.1. sql

select data
   from V
  order by
         cast(
      replace(
    translate(data,
      replace(
    translate(data,'0123456789','##########'),
             '#',''),rpad('#',20,'#')),'#','') as integer)

9. 识别字符串里的数字字符

9.1. sql

MIXED
--------------
CL10AR
KI10NG
MI10LL
7369
7566
7788
7876
7902
ALLEN
WARD
MARTIN
BLAKE
TURNER
JAMES

9.2. DB2

9.2.1. sql

select mixed old,
        cast(
          case
          when
             replace(
           translate(mixed,'9999999999','0123456789'),'9','') = ''
          then
             mixed
          else replace(
             translate(mixed,
                repeat('#',length(mixed)),
              replace(
               translate(mixed,'9999999999','0123456789'),'9','')),
                       '#','')
           end as integer ) mixed
   from V
  where posstr(translate(mixed,'9999999999','0123456789'),'9') > 0

9.3. Oracle

9.3.1. sql

select to_number (
        case
        when
           replace(translate(mixed,'0123456789','9999999999'),'9')
          is not null
        then
             replace(
           translate(mixed,
             replace(
          translate(mixed,'0123456789','9999999999'),'9'),
                    rpad('#',length(mixed),'#')),'#')
        else
              mixed
        end
        ) mixed
  from V
 where instr(translate(mixed,'0123456789','9999999999'),'9') > 0

9.4. PostgreSQL

9.4.1. sql

select cast(
        case
        when
         replace(translate(mixed,'0123456789','9999999999'),'9','')
         is not null
        then
           replace(
          translate(mixed,
            replace(
          translate(mixed,'0123456789','9999999999'),'9',''),
                    rpad('#',length(mixed),'#')),'#','')
        else
          mixed
        end as integer ) as mixed
    from V
  where strpos(translate(mixed,'0123456789','9999999999'),'9') > 0

9.5. MySQL

9.5.1. sql

select cast(group_concat(c order by pos separator '') as unsigned)
        as MIXED1
   from (
 select v.mixed, iter.pos, substr(v.mixed,iter.pos,1) as c
   from V,
        ( select id pos from t10 ) iter
  where iter.pos <= length(v.mixed)
    and ascii(substr(v.mixed,iter.pos,1)) between 48 and 57
        ) y
  group by mixed
  order by 1

10. 提取第n个分隔子字符串

10.1. sql

create view V as
select 'mo,larry,curly' as name
  from t1
 union all
select 'tina,gina,jaunita,regina,leena' as name
  from t1

10.2. sql

SUB
-----
larry
 gina

10.3. DB2

10.3.1. sql

select substr(c,2,locate(',',c,2)-2)
  from (
select pos, name, substr(name, pos) c,
       row_number() over(partition by name
                       order by length(substr(name,pos)) desc) rn
  from (
select ',' ||csv.name|| ',' as name,
       cast(iter.pos as integer) as pos
  from V csv,
       (select row_number() over() pos from t100 ) iter
 where iter.pos <= length(csv.name)+2
       )  x
 where length(substr(name,pos)) > 1
   and substr(substr(name,pos),1,1) = ','
       ) y
 where rn = 2

10.4. Oracle

10.4.1. sql

select sub
   from (
 select iter.pos,
        src.name,
        substr( src.name,
         instr( src.name,',',1,iter.pos )+1,
         instr( src.name,',',1,iter.pos+1 ) -
         instr( src.name,',',1,iter.pos )-1) sub
   from (select ','||name||',' as name from V) src,
        (select rownum pos from emp) iter
  where iter.pos < length(src.name)-length(replace(src.name,','))
        )
  where pos = 2

10.5. PostgreSQL

10.5.1. sql

select name
   from (
 select iter.pos, split_part(src.name,',',iter.pos) as name
   from (select id as pos from t10) iter,
        (select cast(name as text) as name from v) src
  where iter.pos <=
         length(src.name)-length(replace(src.name,',',''))+1
        ) x
  where pos = 2

10.6. MySQL

10.6.1. sql

select name
   from (
 select iter.pos,
        substring_index(
        substring_index(src.name,',',iter.pos),',',-1) name
   from V src,
        (select id pos from t10) iter,
  where iter.pos <=
        length(src.name)-length(replace(src.name,',',''))
        ) x
 where pos = 2

10.7. SQL Server

10.7.1. sql

select substring(c,2,charindex(',',c,2)-2)
   from (
 select pos, name, substring(name, pos, len(name)) as c,
        row_number() over(
         partition by name
         order by len(substring(name,pos,len(name))) desc) rn
   from (
 select ',' + csv.name + ',' as name,
         iter.pos
   from V csv,
        (select id as pos from t100 ) iter
  where iter.pos <= len(csv.name)+2
        ) x
  where len(substring(name,pos,len(name))) > 1
    and substring(substring(name,pos,len(name)),1,1) = ','
        ) y
  where rn = 2

11. 解析IP地址

11.1. 111.22.3.4

11.2. sql

A     B     C     D
----- ----- ----- ---
111   22    3     4

11.3. DB2

11.3.1. sql

with x (pos,ip) as (
   values (1,'.92.111.0.222')
   union all
  select pos+1,ip from x where pos+1 <= 20
 )
  select max(case when rn=1 then e end) a,
         max(case when rn=2 then e end) b,
         max(case when rn=3 then e end) c,
         max(case when rn=4 then e end) d
    from (
  select pos,c,d,
         case when posstr(d,'.') > 0 then substr(d,1,posstr(d,'.')-1)
              else d
         end as e,
         row_number() over(order by pos desc) rn
    from (
  select pos, ip,right(ip,pos) as c, substr(right(ip,pos),2) as d
    from x
   where pos <= length(ip)
     and substr(right(ip,pos),1,1) = '.'
        ) x
        ) y

11.4. Oracle

11.4.1. sql

select ip,
       substr(ip, 1, instr(ip,'.')-1 ) a,
       substr(ip, instr(ip,'.')+1,
                   instr(ip,'.',1,2)-instr(ip,'.')-1 ) b,
       substr(ip, instr(ip,'.',1,2)+1,
                   instr(ip,'.',1,3)-instr(ip,'.',1,2)-1 ) c,
       substr(ip, instr(ip,'.',1,3)+1 ) d
  from (select '92.111.0.2' as ip from t1)

11.5. PostgreSQL

11.5.1. sql

select split_part(y.ip,'.',1) as a,
        split_part(y.ip,'.',2) as b,
        split_part(y.ip,'.',3) as c,
        split_part(y.ip,'.',4) as d
   from (select cast('92.111.0.2' as text) as ip from t1) as y

11.6. MySQL

11.6.1. sql

select substring_index(substring_index(y.ip,'.',1),'.',-1) a,
       substring_index(substring_index(y.ip,'.',2),'.',-1) b,
       substring_index(substring_index(y.ip,'.',3),'.',-1) c,
       substring_index(substring_index(y.ip,'.',4),'.',-1) d
  from (select '92.111.0.2' as ip from t1) y

11.7. SQL Server

11.7.1. sql

with x (pos,ip) as (
    select 1 as pos,'.92.111.0.222' as ip from t1
    union all
   select pos+1,ip from x where pos+1 <= 20
  )
  select max(case when rn=1 then e end) a,
         max(case when rn=2 then e end) b,
         max(case when rn=3 then e end) c,
         max(case when rn=4 then e end) d
    from (
  select pos,c,d,
         case when charindex('.',d) > 0
              then substring(d,1,charindex('.',d)-1)
              else d
         end as e,
         row_number() over(order by pos desc) rn
    from (
  select pos, ip,right(ip,pos) as c,
         substring(right(ip,pos),2,len(ip)) as d
    from x
  where pos <= len(ip)
    and substring(right(ip,pos),1,1) = '.'
       ) x
       ) y
举报
评论 0