본문 바로가기
  • 시간(時間)을 아끼는 방법은 시간을 낭비하지 않는것


교육 후기/스파르타코딩클럽_개발일지

주식 데이터를 활용한 파이썬 데이터 분석_3주차 개발일지

by pk7912 2023. 9. 20.
728x90

3주 차... 코드는 반복하는 게 맞는 거 같은데... 자꾸 복붙 하라네....

 

 

 
DART에서 오픈API 받는법
 
 
 
 
 
 
 
 
 
 
 
 
 
 
!pip install dart-fss
Dart-fss 라이브러리 설치
import dart_fss as dart_fss
import pandas as pd

api_key = '2bca0de6d04cd8f3b74f378f9fb69e6cf22888c9'
dart_fss.set_api_key(api_key=api_key)

corp_list = dart_fss.get_corp_list()

corp_list.corps
Dart의 OpenAPI를 발급받고, 키넣기
 
all = dart_fss.api.filings.get_corp_code()

all[0]
전체 종목을 보는 코드
 
df = pd.DataFrame(all)

df_listed = df[df['stock_code'].notnull()]
df_listed.head()
 

stock_code 가 있는 종목은 상장사,

stock_code 가없는 종목은 비상장사를 의미

df_listed = df [df ['stock_code']. notnull()]

df_non_listed = df [df ['stock_code']. isnull()]로 코드구분

 

df_listed.count()
df_non_listed = df[df['stock_code'].isnull()]
df_non_listed.head()

df_non_listed.count()
 
corp_code      99109
corp_name      99109
stock_code         0
modify_date    99109
dtype: int64

 

df_listed.to_excel('상장종목.xlsx')
df_non_listed.to_excel('비상장종목.xlsx')

corp_code = df_listed[df_listed['corp_name'] == '삼성전자'].iloc[0,0]
corp_code
 ]
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
dart_fss.api.filings.get_corp_info(corp_code)
 
{'status': '000',
 'message': '정상',
 'corp_code': '00258801',
 'corp_name': '(주)카카오',
 'corp_name_eng': 'Kakao Corp.',
 'stock_name': '카카오',
 'stock_code': '035720',
 'ceo_nm': '홍은택  대표이사',
 'corp_cls': 'Y',
 'jurir_no': '1101111129497',
 'bizr_no': '1208147521',
 'adres': '제주특별자치도 제주시 첨단로 242',
 'hm_url': 'www.kakaocorp.com',
 'ir_url': 'https://www.kakaocorp.com/ir/main',
 'phn_no': '02-6718-1082',
 'fax_no': '02-6718-3647',
 'induty_code': '63120',
 'est_dt': '19950216',
 'acc_mt': '12'}
기업 정보

 

 

 
dart_fss.api.info.unrst_exctv_mendng_sttus(corp_code, '2022', '11011')
 
{'status': '000',
 'message': '정상',
 'list': [{'rcept_no': '20230320001096',
   'corp_cls': 'Y',
   'corp_code': '00258801',
   'corp_name': '카카오',
   'se': '미등기임원',
   'fyer_salary_totamt': '9,471,000,000',
   'jan_salary_am': '592,000,000',
   'nmpr': '16',
   'rm': '-'}]}
 
 
data = dart_fss.api.info.unrst_exctv_mendng_sttus(corp_code, '2022', '11011')
data['list']

pd.DataFrame(data['list'])
미등기임원 보수 총액


 

 

 
data = dart_fss.api.info.irds_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
증자(감자)현황

 

data = dart_fss.api.info.alot_matter(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
배당 현황
 
 
data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')
pd.DataFrame(data['list'])
 

최대주주 현황

 

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.exctv_sttus(corp_code, '2022', '11011')

pd.DataFrame(data['list'])
 

임원 사항

 

corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.emp_sttus(corp_code, '2022', '11011')

pd.DataFrame(data['list'])
직원 현황
 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.hmv_audit_indvdl_by_sttus(corp_code, '2022', '11011')

pd.DataFrame(data['list'])
 

이사 보수

 

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

pd.DataFrame(data['list'])
 

연봉 top 5

 

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.otr_cpr_invstmnt_sttus(corp_code, '2022', '11011')

pd.DataFrame(data['list'])
타법인 출자 현황

 

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.finance.fnltt_singl_acnt(corp_code, '2022', '11011')

pd.DataFrame(data['list'])
 

상장기업 재무정보

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.shareholder.elestock(corp_code)

pd.DataFrame(data['list'])
 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.shareholder.elestock(corp_code)

df = pd.DataFrame(data['list'])
df[df['repror'] == '김범수']
주주정보

 

 

상장 종목 분석

 

 

연봉 Top 50

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

pd.DataFrame(data['list'])
 

우선 한 종목 살펴보기

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])

df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df
데이터 표에서 필요한 항목만 가져오기
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])

df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df
 

항목이름을 알기 쉽게 한국어로...

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])

df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = df['보수'].str.replace(',', '')

df
금액이 문자로 되어 있으니 숫자로 바꿔 줘야한다.

우선, 쉼표 제거하기

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])

df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))

df
 
 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])

df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))

df.dtypes
 
기업명    object
이름     object
역할     object
보수      int64
dtype: object
 

금액을 숫자로 바꿔주기

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])

df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
df.columns = ['기업명','이름', '역할', '보수']
df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))

df.sort_values(by='보수', ascending=False)

정렬시키기

 

 
def get_salary(name):
    corp_code = df_listed[df_listed['corp_name'] == name].iloc[0,0]
    data = dart_fss.api.info.indvdl_by_pay(corp_code, '2022', '11011')

    df = pd.DataFrame(data['list'])

    df = df[['corp_name', 'nm', 'ofcps', 'mendng_totamt']]
    df.columns = ['기업명','이름', '역할', '보수']
    df['보수'] = pd.to_numeric(df['보수'].str.replace(',', ''))

    return df

한종목에서 필요한 부분을 얻을수 있게 만들었으니,

다른 종목도 넣어보게 함수로 만들어주기

 
get_salary('삼성전자')
 
names = ['삼성전자','LG에너지솔루션','SK하이닉스','NAVER','삼성바이오로직스','삼성전자우','카카오','삼성SDI','현대차','LG화학','기아','POSCO홀딩스','KB금융','카카오뱅크','셀트리온','신한지주','삼성물산','현대모비스','SK이노베이션','LG전자','카카오페이','SK','한국전력','크래프톤','하나금융지주','LG생활건강','HMM','삼성생명','하이브','두산중공업','SK텔레콤','삼성전기','SK바이오사이언스','LG','S-Oil','고려아연','KT&G','우리금융지주','대한항공','삼성에스디에스','현대중공업','엔씨소프트','삼성화재','아모레퍼시픽','KT','포스코케미칼','넷마블','SK아이이테크놀로지','LG이노텍','기업은행']

dfs = []
for name in names:
    try:
        df = get_salary(name)
        dfs.append(df)

    except:
        print(f'error - {name}')

df_result = pd.concat(dfs)

df_result
여러 종목 대입해 볼수 있게 for문으로 돌려준다.

 

df_result.sort_values(by='보수', ascending=False).head(30)
 

최대 주주의 주식 변동

 

 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])
 
 
corp_code = df_listed[df_listed['corp_name'] == '카카오'].iloc[0,0]
data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')

df = pd.DataFrame(data['list'])
df = df[['corp_name', 'nm', 'relate', 'bsis_posesn_stock_qota_rt', 'trmend_posesn_stock_qota_rt', 'rm']]

df.columns = ['기업명', '이름', '관계', '기초지분율', '기말지분율', '비고']

df = df[df['관계'].notnull()]

df['기초지분율'] = pd.to_numeric(df['기초지분율'])
df['기말지분율'] = pd.to_numeric(df['기말지분율'])

df.sort_values(by='기초지분율', ascending=False).head(3)
 

함수로 만들기

 
 def get_shareholders(corp_code):
    data = dart_fss.api.info.hyslr_sttus(corp_code, '2022', '11011')

    df = pd.DataFrame(data['list'])
    df = df[['corp_name', 'nm', 'relate', 'bsis_posesn_stock_qota_rt', 'trmend_posesn_stock_qota_rt', 'rm']]

    df.columns = ['기업명', '이름', '관계', '기초지분율', '기말지분율', '비고']

    df = df[df['관계'].notnull()]

    df['기초지분율'] = pd.to_numeric(df['기초지분율'])
    df['기말지분율'] = pd.to_numeric(df['기말지분율'])

    return df.sort_values(by='기초지분율', ascending=False).head(3)
 
 
get_shareholders('00258801')
 
corp_codes = list(df_listed.sample(10)['corp_code'])

corp_codes
 
['01766167',
 '00163318',
 '00330044',
 '00142865',
 '00173032',
 '00275260',
 '00369107',
 '01199550',
 '00138376',
 '01359736']
list = 반복문으로

 

corp_codes = list(df_listed.sample(10)['corp_code'])

dfs = []

for corp_code in corp_codes:
    try:
        df = get_shareholders(corp_code)
        dfs.append(df)

    except:
        print(f'error - {corp_code}')

df_result = pd.concat(dfs)
df_result['증감'] = df_result['기말지분율'] - df_result['기초지분율']

df_result.sort_values(by='증감', ascending=False)

 
 

위에 데이터는 상장종목 sample(10) 개만

아래는 모든 데이터 분석하고 상위 10개만...

하고 싶었는데 시간 오지게 걸림

 

코드정리 후 다시 실습

 

수익이 많이 난 회사 찾기

!pip install dart-fss
 
Collecting dart-fss
  Downloading dart_fss-0.4.4-py3-none-any.whl (141 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 141.2/141.2 kB 1.2 MB/s eta 0:00:00
Collecting xmltodict (from dart-fss)
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from dart-fss) (2.31.0)
Collecting arelle-release (from dart-fss)
  Downloading arelle_release-2.13.9-py3-none-any.whl (8.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.1/8.1 MB 24.2 MB/s eta 0:00:00
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from dart-fss) (1.23.5)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from dart-fss) (1.5.3)
Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from dart-fss) (4.66.1)
Collecting halo (from dart-fss)
  Downloading halo-0.0.31.tar.gz (11 kB)
  Preparing metadata (setup.py) ... done
Collecting fake-useragent>=1.0 (from dart-fss)
  Downloading fake_useragent-1.2.1-py3-none-any.whl (14 kB)
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from dart-fss) (4.11.2)
Requirement already satisfied: appdirs in /usr/local/lib/python3.10/dist-packages (from dart-fss) (1.4.4)
Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (2023.7.22)
Collecting isodate==0.* (from arelle-release->dart-fss)
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 41.7/41.7 kB 4.2 MB/s eta 0:00:00
Requirement already satisfied: lxml==4.* in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (4.9.3)
Requirement already satisfied: openpyxl==3.* in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (3.1.2)
Requirement already satisfied: pyparsing==3.* in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (3.1.1)
Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from arelle-release->dart-fss) (2023.6.3)
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from isodate==0.*->arelle-release->dart-fss) (1.16.0)
Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl==3.*->arelle-release->dart-fss) (1.1.0)
Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->dart-fss) (2.5)
Collecting log_symbols>=0.0.14 (from halo->dart-fss)
  Downloading log_symbols-0.0.14-py3-none-any.whl (3.1 kB)
Collecting spinners>=0.0.24 (from halo->dart-fss)
  Downloading spinners-0.0.24-py3-none-any.whl (5.5 kB)
Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from halo->dart-fss) (2.3.0)
Collecting colorama>=0.3.9 (from halo->dart-fss)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->dart-fss) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->dart-fss) (2023.3.post1)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->dart-fss) (3.2.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->dart-fss) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->dart-fss) (2.0.4)
Building wheels for collected packages: halo
  Building wheel for halo (setup.py) ... done
  Created wheel for halo: filename=halo-0.0.31-py3-none-any.whl size=11235 sha256=9569424e4edf900cb5092c90124aa72c0282e251b512ae63feb40ecfbc31e27b
  Stored in directory: /root/.cache/pip/wheels/5a/d9/8a/b4f14c44aba7c164d4379eca6f1dde59360050406b1edaec24
Successfully built halo
Installing collected packages: spinners, fake-useragent, xmltodict, isodate, colorama, log_symbols, arelle-release, halo, dart-fss
Successfully installed arelle-release-2.13.9 colorama-0.4.6 dart-fss-0.4.4 fake-useragent-1.2.1 halo-0.0.31 isodate-0.6.1 log_symbols-0.0.14 spinners-0.0.24 xmltodict-0.13.0
 
import dart_fss as dart_fss
import pandas as pd

api_key = '2bca0de6d04cd8f3b74f378f9fb69e6cf22888c9'
dart_fss.set_api_key(api_key=api_key)

 
all = dart_fss.api.filings.get_corp_code()
 
 
df = pd.DataFrame(all)

df_listed = df[df['stock_code'].notnull()]
df_non_listed = df[df['stock_code'].isnull()]
CFS:연결재무제표, OFS:재무제표

 

 
def get_profit(name):
    corp_code = df_listed[df_listed['corp_name'] == name].iloc[0,0]
    data = dart_fss.api.finance.fnltt_singl_acnt(corp_code, '2022', '11011')

    df = pd.DataFrame(data['list'])

    cond = (df['fs_div'] == 'CFS') & (df['account_nm'] == '이익잉여금')
    df = df[cond]

    df['name'] = name

    df = df[['name', 'thstrm_amount', 'frmtrm_amount']]
    df.columns = ['기업명', '당기', '전기']

    df['당기'] = pd.to_numeric(df['당기'].str.replace(',', ''))
    df['전기'] = pd.to_numeric(df['전기'].str.replace(',', ''))

    df['증감'] = df['당기'] - df['전기']
    df['증감율'] = abs(df['증감']) / abs(df['전기'])

    return df
 
get_profit('현대자동차')
 
 
names = list(df_listed.sample(10)['corp_name'])

dfs = []

for name in names:
    try:
        df = get_profit(name)
        dfs.append(df)

    except:
        print(f'error - {name}')


df_result = pd.concat(dfs)
df_result.sort_values(by='증감율', ascending=False)
 
 

비상장종목

 

 
import dart_fss as dart_fss
import pandas as pd

api_key = '2bca0de6d04cd8f3b74f378f9fb69e6cf22888c9'
dart_fss.set_api_key(api_key=api_key)
 
 
all = dart_fss.api.filings.get_corp_code()
df = pd.DataFrame(all)

df_listed = df[df['stock_code'].notnull()]
df_non_listed = df[df['stock_code'].isnull()]
 
def get_earning(name):
    corp_code = df_non_listed[df_non_listed['corp_name'] == name].iloc[0,0]
    data = dart_fss.api.info.alot_matter(corp_code, '2022', '11011')
    df = pd.DataFrame(data['list'])

    df = df[df['se'] == '(연결)당기순이익(백만원)']

    df = df[['corp_name', 'thstrm', 'frmtrm', 'lwfr']]

    df.columns = ['기업명', '2022', '2021', '2020']

    df['2022'] = pd.to_numeric(df['2022'].str.replace(',', ''))
    df['2021'] = pd.to_numeric(df['2021'].str.replace(',', ''))
    df['2020'] = pd.to_numeric(df['2020'].str.replace(',', ''))


    return df
 
 
get_earning('비바리퍼블리카')

 

 

숙제

 
def get_salary(name):
    corp_code = df_listed[df_listed['corp_name'] == name].iloc[0,0]
    data = dart_fss.api.info.emp_sttus(corp_code, '2021', '11011')

    df = pd.DataFrame(data['list'])

    df = df[['corp_name','sexdstn','jan_salary_am']]

    df_result = pd.DataFrame()
    doc = {
        '기업명': name,
        '연봉(남)' : df[df['sexdstn'] == '남'].iloc[-1,-1],
        '연봉(여)' : df[df['sexdstn'] == '여'].iloc[-1,-1]

    }
    df_result = df_result.append(doc, ignore_index = True)

    df_result['연봉(남)'] = pd.to_numeric(df_result['연봉(남)'].str.replace(',', ''))
    df_result['연봉(여)'] = pd.to_numeric(df_result['연봉(여)'].str.replace(',', ''))


    return df_result
 
 
get_salary('넥센')
 
 
names = list(df_listed.sample(10)['corp_name'])

dfs = []

for name in names:
    try:
        df = get_salary(name)
        dfs.append(df)
    except:
        print(f'error - {name}')

df_result = pd.concat(dfs)

df_result['남녀_급여차이'] = df_result['연봉(남)'] - df_result['연봉(여)']
df_result['평균'] = (df_result['연봉(남)'] + df_result['연봉(여)'])/2

df_result.sort_values(by='남녀_급여차이', ascending = True)




 
 

여러 번 반복하니깐 눈에 조금 들어오는데.... 반복이 맞는 거 같은 느낌이 확신으로 바뀌어 가는 중...

 

댓글