728x90
3주 차... 코드는 반복하는 게 맞는 거 같은데... 자꾸 복붙 하라네....
DART에서 오픈API 받는법
!pip install dart-fss
Dart-fss 라이브러리 설치
import dart_fss as dart_fss
import pandas as pd
api_key = '2bca0de6d04cd8f3b74f378f9fb69e6cf22888c9'
dart_fss.set_api_key(api_key=api_key)
corp_list = dart_fss.get_corp_list()
corp_list.corps
Dart의 OpenAPI를 발급받고, 키넣기
all = dart_fss.api.filings.get_corp_code()
all[0]
전체 종목을 보는 코드
df = pd.DataFrame(all)
df_listed = df[df['stock_code'].notnull()]
df_listed.head()
stock_code 가 있는 종목은 상장사,
stock_code 가없는 종목은 비상장사를 의미
df_listed = df [df ['stock_code']. notnull()]
df_non_listed = df [df ['stock_code']. isnull()]로 코드구분
df_listed.count()
df_non_listed = df[df['stock_code'].isnull()]
df_non_listed.head()
df_non_listed.count()
corp_code 99109
corp_name 99109
stock_code 0
modify_date 99109
dtype: int64
df_listed.to_excel('상장종목.xlsx')
df_non_listed.to_excel('비상장종목.xlsx')
corp_code = df_listed[df_listed['corp_name'] == '삼성전자'].iloc[0,0]
corp_code
]
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
dart_fss.api.filings.get_corp_info(corp_code)
{'status': '000',
'message': '정상',
'corp_code': '00258801',
'corp_name': '(주)카카오',
'corp_name_eng': 'Kakao Corp.',
'stock_name': '카카오',
'stock_code': '035720',
'ceo_nm': '홍은택 대표이사',
'corp_cls': 'Y',
'jurir_no': '1101111129497',
'bizr_no': '1208147521',
'adres': '제주특별자치도 제주시 첨단로 242',
'hm_url': 'www.kakaocorp.com',
'ir_url': 'https://www.kakaocorp.com/ir/main',
'phn_no': '02-6718-1082',
'fax_no': '02-6718-3647',
'induty_code': '63120',
'est_dt': '19950216',
'acc_mt': '12'}
기업 정보
dart_fss.api.info.unrst_exctv_mendng_sttus(corp_code, '2022', '11011')
{'status': '000',
'message': '정상',
'list': [{'rcept_no': '20230320001096',
'corp_cls': 'Y',
'corp_code': '00258801',
'corp_name': '카카오',
'se': '미등기임원',
'fyer_salary_totamt': '9,471,000,000',
'jan_salary_am': '592,000,000',
'nmpr': '16',
'rm': '-'}]}
data = dart_fss.api.info.unrst_exctv_mendng_sttus(corp_code, '2022', '11011')
data['list']
pd.DataFrame(data['list'])
미등기임원 보수 총액
data = dart_fss.api.info.irds_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
증자(감자)현황
data = dart_fss.api.info.alot_matter(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
배당 현황
data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
최대주주 현황
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.exctv_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
임원 사항
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.emp_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
직원 현황
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.hmv_audit_indvdl_by_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
이사 보수
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
연봉 top 5
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.otr_cpr_invstmnt_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
타법인 출자 현황
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.finance.fnltt_singl_acnt(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
상장기업 재무정보
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.shareholder.elestock(corp_code)
pd.DataFrame(data['list'])
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.shareholder.elestock(corp_code)
df = pd.DataFrame(data['list'])
df[df['repror'] == '김범수']
주주정보
상장 종목 분석
연봉 Top 50
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
우선 한 종목 살펴보기
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df
데이터 표에서 필요한 항목만 가져오기
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df
항목이름을 알기 쉽게 한국어로...
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = df['보수'].str.replace(',', '')
df
금액이 문자로 되어 있으니 숫자로 바꿔 줘야한다.
우선, 쉼표 제거하기
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))
df
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))
df.dtypes
기업명 object
이름 object
역할 object
보수 int64
dtype: object
금액을 숫자로 바꿔주기
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))
df.sort_values(by='보수', ascending=False)
정렬시키기
def get_salary(name):
corp_code = df_listed[df_listed['corp_name'] == name].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))
return df
한종목에서 필요한 부분을 얻을수 있게 만들었으니,
다른 종목도 넣어보게 함수로 만들어주기
get_salary('삼성전자')
names = ['삼성전자','LG에너지솔루션','SK하이닉스','NAVER','삼성바이오로직스','삼성전자우','카카오','삼성SDI','현대차','LG화학','기아','POSCO홀딩스','KB금융','카카오뱅크','셀트리온','신한지주','삼성물산','현대모비스','SK이노베이션','LG전자','카카오페이','SK','한국전력','크래프톤','하나금융지주','LG생활건강','HMM','삼성생명','하이브','두산중공업','SK텔레콤','삼성전기','SK바이오사이언스','LG','S-Oil','고려아연','KT&G','우리금융지주','대한항공','삼성에스디에스','현대중공업','엔씨소프트','삼성화재','아모레퍼시픽','KT','포스코케미칼','넷마블','SK아이이테크놀로지','LG이노텍','기업은행']
dfs = []
for name in names:
try:
df = get_salary(name)
dfs.append(df)
except:
print(f'error - {name}')
df_result = pd.concat(dfs)
df_result
여러 종목 대입해 볼수 있게 for문으로 돌려준다.
df_result.sort_values(by='보수', ascending=False).head(30)
최대 주주의 주식 변동
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'relate', 'bsis_posesn_stock_qota_rt', 'trmend_posesn_stock_qota_rt', 'rm']]
df.columns = ['기업명', '이름', '관계', '기초지분율', '기말지분율', '비고']
df = df[df['관계'].notnull()]
df['기초지분율'] = pd.to_numeric(df['기초지분율'])
df['기말지분율'] = pd.to_numeric(df['기말지분율'])
df.sort_values(by='기초지분율', ascending=False).head(3)
함수로 만들기
def get_shareholders(corp_code):
data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'relate', 'bsis_posesn_stock_qota_rt', 'trmend_posesn_stock_qota_rt', 'rm']]
df.columns = ['기업명', '이름', '관계', '기초지분율', '기말지분율', '비고']
df = df[df['관계'].notnull()]
df['기초지분율'] = pd.to_numeric(df['기초지분율'])
df['기말지분율'] = pd.to_numeric(df['기말지분율'])
return df.sort_values(by='기초지분율', ascending=False).head(3)
get_shareholders('00258801')
corp_codes = list(df_listed.sample(10)['corp_code'])
corp_codes
['01766167',
'00163318',
'00330044',
'00142865',
'00173032',
'00275260',
'00369107',
'01199550',
'00138376',
'01359736']
list = 반복문으로
corp_codes = list(df_listed.sample(10)['corp_code'])
dfs = []
for corp_code in corp_codes:
try:
df = get_shareholders(corp_code)
dfs.append(df)
except:
print(f'error - {corp_code}')
df_result = pd.concat(dfs)
df_result['증감'] = df_result['기말지분율'] - df_result['기초지분율']
df_result.sort_values(by='증감', ascending=False)
위에 데이터는 상장종목 sample(10) 개만
아래는 모든 데이터 분석하고 상위 10개만...
하고 싶었는데 시간 오지게 걸림
코드정리 후 다시 실습
수익이 많이 난 회사 찾기
!pip install dart-fss
Collecting dart-fss
Downloading dart_fss-0.4.4-py3-none-any.whl (141 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 141.2/141.2 kB 1.2 MB/s eta 0:00:00
Collecting xmltodict (from dart-fss)
Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from dart-fss) (2.31.0)
Collecting arelle-release (from dart-fss)
Downloading arelle_release-2.13.9-py3-none-any.whl (8.1 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.1/8.1 MB 24.2 MB/s eta 0:00:00
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from dart-fss) (1.23.5)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from dart-fss) (1.5.3)
Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from dart-fss) (4.66.1)
Collecting halo (from dart-fss)
Downloading halo-0.0.31.tar.gz (11 kB)
Preparing metadata (setup.py) ... done
Collecting fake-useragent>=1.0 (from dart-fss)
Downloading fake_useragent-1.2.1-py3-none-any.whl (14 kB)
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from dart-fss) (4.11.2)
Requirement already satisfied: appdirs in /usr/local/lib/python3.10/dist-packages (from dart-fss) (1.4.4)
Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (2023.7.22)
Collecting isodate==0.* (from arelle-release->dart-fss)
Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 41.7/41.7 kB 4.2 MB/s eta 0:00:00
Requirement already satisfied: lxml==4.* in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (4.9.3)
Requirement already satisfied: openpyxl==3.* in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (3.1.2)
Requirement already satisfied: pyparsing==3.* in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (3.1.1)
Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (2023.6.3)
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from isodate==0.*->arelle-release->dart-fss) (1.16.0)
Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl==3.*->arelle-release->dart-fss) (1.1.0)
Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->dart-fss) (2.5)
Collecting log_symbols>=0.0.14 (from halo->dart-fss)
Downloading log_symbols-0.0.14-py3-none-any.whl (3.1 kB)
Collecting spinners>=0.0.24 (from halo->dart-fss)
Downloading spinners-0.0.24-py3-none-any.whl (5.5 kB)
Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from halo->dart-fss) (2.3.0)
Collecting colorama>=0.3.9 (from halo->dart-fss)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->dart-fss) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->dart-fss) (2023.3.post1)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->dart-fss) (3.2.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->dart-fss) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->dart-fss) (2.0.4)
Building wheels for collected packages: halo
Building wheel for halo (setup.py) ... done
Created wheel for halo: filename=halo-0.0.31-py3-none-any.whl size=11235 sha256=9569424e4edf900cb5092c90124aa72c0282e251b512ae63feb40ecfbc31e27b
Stored in directory: /root/.cache/pip/wheels/5a/d9/8a/b4f14c44aba7c164d4379eca6f1dde59360050406b1edaec24
Successfully built halo
Installing collected packages: spinners, fake-useragent, xmltodict, isodate, colorama, log_symbols, arelle-release, halo, dart-fss
Successfully installed arelle-release-2.13.9 colorama-0.4.6 dart-fss-0.4.4 fake-useragent-1.2.1 halo-0.0.31 isodate-0.6.1 log_symbols-0.0.14 spinners-0.0.24 xmltodict-0.13.0
import dart_fss as dart_fss
import pandas as pd
api_key = '2bca0de6d04cd8f3b74f378f9fb69e6cf22888c9'
dart_fss.set_api_key(api_key=api_key)
all = dart_fss.api.filings.get_corp_code()
df = pd.DataFrame(all)
df_listed = df[df['stock_code'].notnull()]
df_non_listed = df[df['stock_code'].isnull()]
CFS:연결재무제표, OFS:재무제표
def get_profit(name):
corp_code = df_listed[df_listed['corp_name'] == name].iloc[0,0]
data = dart_fss.api.finance.fnltt_singl_acnt(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
cond = (df['fs_div'] == 'CFS') & (df['account_nm'] == '이익잉여금')
df = df[cond]
df['name'] = name
df = df[['name', 'thstrm_amount', 'frmtrm_amount']]
df.columns = ['기업명', '당기', '전기']
df['당기'] = pd.to_numeric(df['당기'].str.replace(',', ''))
df['전기'] = pd.to_numeric(df['전기'].str.replace(',', ''))
df['증감'] = df['당기'] - df['전기']
df['증감율'] = abs(df['증감']) / abs(df['전기'])
return df
get_profit('현대자동차')
names = list(df_listed.sample(10)['corp_name'])
dfs = []
for name in names:
try:
df = get_profit(name)
dfs.append(df)
except:
print(f'error - {name}')
df_result = pd.concat(dfs)
df_result.sort_values(by='증감율', ascending=False)
비상장종목
import dart_fss as dart_fss
import pandas as pd
api_key = '2bca0de6d04cd8f3b74f378f9fb69e6cf22888c9'
dart_fss.set_api_key(api_key=api_key)
all = dart_fss.api.filings.get_corp_code()
df = pd.DataFrame(all)
df_listed = df[df['stock_code'].notnull()]
df_non_listed = df[df['stock_code'].isnull()]
def get_earning(name):
corp_code = df_non_listed[df_non_listed['corp_name'] == name].iloc[0,0]
data = dart_fss.api.info.alot_matter(corp_code, '2022', '11011')
df = pd.DataFrame(data['list'])
df = df[df['se'] == '(연결)당기순이익(백만원)']
df = df[['corp_name', 'thstrm', 'frmtrm', 'lwfr']]
df.columns = ['기업명', '2022', '2021', '2020']
df['2022'] = pd.to_numeric(df['2022'].str.replace(',', ''))
df['2021'] = pd.to_numeric(df['2021'].str.replace(',', ''))
df['2020'] = pd.to_numeric(df['2020'].str.replace(',', ''))
return df
get_earning('비바리퍼블리카')
숙제
def get_salary(name):
corp_code = df_listed[df_listed['corp_name'] == name].iloc[0,0]
data = dart_fss.api.info.emp_sttus(corp_code, '2021', '11011')
df = pd.DataFrame(data['list'])
df = df[['corp_name','sexdstn','jan_salary_am']]
df_result = pd.DataFrame()
doc = {
'기업명': name,
'연봉(남)' : df[df['sexdstn'] == '남'].iloc[-1,-1],
'연봉(여)' : df[df['sexdstn'] == '여'].iloc[-1,-1]
}
df_result = df_result.append(doc, ignore_index = True)
df_result['연봉(남)'] = pd.to_numeric(df_result['연봉(남)'].str.replace(',', ''))
df_result['연봉(여)'] = pd.to_numeric(df_result['연봉(여)'].str.replace(',', ''))
return df_result
get_salary('넥센')
names = list(df_listed.sample(10)['corp_name'])
dfs = []
for name in names:
try:
df = get_salary(name)
dfs.append(df)
except:
print(f'error - {name}')
df_result = pd.concat(dfs)
df_result['남녀_급여차이'] = df_result['연봉(남)'] - df_result['연봉(여)']
df_result['평균'] = (df_result['연봉(남)'] + df_result['연봉(여)'])/2
df_result.sort_values(by='남녀_급여차이', ascending = True)
여러 번 반복하니깐 눈에 조금 들어오는데.... 반복이 맞는 거 같은 느낌이 확신으로 바뀌어 가는 중...
'교육 후기 > 스파르타코딩클럽_개발일지' 카테고리의 다른 글
주식 데이터를 활용한 파이썬 데이터 분석_2주차 개발일지 (0) | 2023.09.10 |
---|---|
마케터, 기획자를 위한 실전 데이터 분석_2주차 개발일지 (0) | 2023.09.10 |
마케터, 기획자를 위한 실전 데이터 분석_1주차 개발일지 (0) | 2023.09.03 |
주식 데이터를 활용한 파이썬 데이터 분석_1주차 개발일지 (0) | 2023.09.03 |
댓글