Python 문자열

문자열 생성

s = "Hello"
s = 'Hello'
s = """여러 줄
문자열"""
s = r"C:\Users\path"   # 원시 문자열 (이스케이프 무시)

문자열 연산

"a" + "b"       # "ab" (연결)
"a" * 3         # "aaa" (반복)
len("hello")    # 5 (길이)

"el" in "hello"     # True (포함 여부)
"el" not in "hello" # False

인덱싱과 슬라이싱

s = "Hello World"

# 인덱싱
s[0]      # 'H'
s[-1]     # 'd' (마지막)

# 슬라이싱 [start:end:step]
s[0:5]    # 'Hello'
s[6:]     # 'World'
s[:5]     # 'Hello'
s[::2]    # 'HloWrd' (2칸씩)
s[::-1]   # 'dlroW olleH' (뒤집기)

# 슬라이싱 응용
s[1:7:2]  # 'el ' (1~6, 2칸씩)
s[-5:]    # 'World' (뒤에서 5글자)

문자열 포매팅

name, age = "Alice", 25

# 1. f-string (권장)
f"Hello, {name}! Age: {age}"
f"{age:05d}"      # "00025" (0으로 채우기)
f"{3.14159:.2f}"  # "3.14" (소수점 2자리)
f"{name:>10}"     # "     Alice" (오른쪽 정렬)
f"{name:<10}"     # "Alice     " (왼쪽 정렬)
f"{name:^10}"     # "  Alice   " (가운데 정렬)
f"{name:=^10}"    # "==Alice===" (=로 채우기)

# 2. format()
"Hello, {}!".format(name)
"{0}, {1}".format("a", "b")
"{name}".format(name="Alice")

# 3. % 포매팅 (구식)
"number: %d" % 10       # %d: 정수
"string: %s" % "hi"     # %s: 문자열
"float: %.2f" % 3.14    # %f: 실수
"%c" % 97               # 'a' (아스키코드)
"%10s" % "hi"           # "        hi" (오른쪽 정렬)
"%-10s" % "hi"          # "hi        " (왼쪽 정렬)

대소문자 변환

s = "Hello World"

s.upper()         # "HELLO WORLD"
s.lower()         # "hello world"
s.capitalize()    # "Hello world" (첫 글자만 대문자)
s.title()         # "Hello World" (단어별 첫 글자)
s.swapcase()      # "hELLO wORLD"

# 확인
s.isupper()       # False
s.islower()       # False
s.istitle()       # True

검색과 치환

s = "hello world hello"

# 검색
s.find("world")       # 6 (인덱스, 없으면 -1)
s.index("world")      # 6 (없으면 에러)
s.rfind("hello")      # 12 (오른쪽부터 검색)
s.count("hello")      # 2 (개수)

# 시작/끝 확인
s.startswith("hello") # True
s.endswith("hello")   # True
s.startswith(("he", "wo"))  # True (튜플로 여러 개 체크)

# 치환
s.replace("hello", "hi")       # "hi world hi"
s.replace("hello", "hi", 1)    # "hi world hello" (1번만)

공백 처리

s = "  hello world  "

s.strip()         # "hello world" (양쪽)
s.lstrip()        # "hello world  " (왼쪽)
s.rstrip()        # "  hello world" (오른쪽)

s.strip("x")      # 특정 문자 제거
"xxhelloxx".strip("x")  # "hello"

분리와 결합

# 분리
"a,b,c".split(",")           # ['a', 'b', 'c']
"a b c".split()              # ['a', 'b', 'c'] (공백 기준)
"a,b,c".split(",", 1)        # ['a', 'b,c'] (1번만)
"a\nb\nc".splitlines()       # ['a', 'b', 'c']

# 정규식 분리 (re.split)
import re
re.split(r'[,;]', 'a,b;c')   # ['a', 'b', 'c'] (여러 구분자)
re.split(r'\s+', 'a   b  c') # ['a', 'b', 'c'] (공백 여러 개)

# 결합
",".join(["a", "b", "c"])    # "a,b,c"
" ".join(["hello", "world"]) # "hello world"
"".join(["a", "b", "c"])     # "abc"

문자열 판별

"abc".isalpha()       # True (문자만)
"123".isdigit()       # True (숫자만)
"123".isnumeric()     # True (숫자 - 유니코드 포함)
"abc123".isalnum()    # True (문자+숫자)
"   ".isspace()       # True (공백만)
"abc".isascii()       # True (ASCII만)

정렬과 채우기

s = "hello"

s.center(10)      # "  hello   "
s.ljust(10)       # "hello     "
s.rjust(10)       # "     hello"

s.center(10, "-") # "--hello---"
s.zfill(10)       # "00000hello"
"42".zfill(5)     # "00042"

인코딩/디코딩

# 문자열 → 바이트
s = "한글"
b = s.encode("utf-8")     # b'\xed\x95\x9c\xea\xb8\x80'
b = s.encode("euc-kr")    # b'\xc7\xd1\xb1\xdb'

# 바이트 → 문자열
b.decode("utf-8")         # "한글"

# 아스키 코드
ord("A")                  # 65
chr(65)                   # "A"

정규식 기초 (re 모듈)

import re

text = "email: test@example.com, phone: 010-1234-5678"

# 검색
re.search(r"\d+", text)           # 첫 번째 매칭
re.findall(r"\d+", text)          # ['010', '1234', '5678']

# 치환
re.sub(r"\d", "*", text)          # 숫자 → *

# 분리
re.split(r"[,:]", text)           # [, :] 기준 분리

# 매칭
re.match(r"email", text)          # 시작 부분 매칭

# 자주 쓰는 패턴
r"\d"       # 숫자
r"\w"       # 단어 문자 (알파벳, 숫자, _)
r"\s"       # 공백
r"."        # 아무 문자
r"+"        # 1개 이상
r"*"        # 0개 이상
r"?"        # 0개 또는 1개
r"[a-z]"    # 범위
r"^"        # 시작
r"$"        # 끝

유용한 패턴

# 문자열 뒤집기
"hello"[::-1]             # "olleh"

# 문자 빈도 세기
from collections import Counter
Counter("hello")          # {'l': 2, 'h': 1, 'e': 1, 'o': 1}

# 여러 구분자로 분리
import re
re.split(r"[,;\s]+", "a,b;c d")  # ['a', 'b', 'c', 'd']

# 문자열 비교 (대소문자 무시)
"Hello".lower() == "hello".lower()  # True

# 특정 문자만 남기기
"".join(c for c in "a1b2c3" if c.isalpha())  # "abc"

# 문자열 반복 체크
def is_repeated(s):
    return s in (s + s)[1:-1]
is_repeated("abab")  # True