#!/usr/bin/env python
# coding: utf-8

# In[ ]:


get_ipython().run_line_magic('run', '"/Market Monitoring/eatools"')


# In[ ]:


import fileinput
import matplotlib.pyplot as plt 
import matplotlib.ticker  
import matplotlib.dates
import matplotlib
import matplotlib.pyplot as plt
import pandas.io.sql
import datetime
import numpy as np
from pandas import *
import pandas as pd
import ossaudiodev
from datetime import date, datetime, time, timedelta
inputPath='/dbfs/mnt/mmdatastoresaprd/data/Roger/HSOTC/'


# ### Overview of data sources and calculation of carbon impact on SRMC

# 1. Historical carbon (NZU) price sourced from:
#    - recent data from emsTradepoint
#    - older data from 'NZ ETS' series at https://icapcarbonaction.com/en/ets-prices
# 
# 
# 2. Obtain parameters for different plants:
#    -	heat rate (HR) (GJ/MWh) (source: Table 3-13 of https://www.mbie.govt.nz/assets/2020-thermal-generation-stack-update-report.pdf)
#    
#    -	emission factor (EF) (tonnes CO2 per TJ) (source: Tables A4.1 (gas = 'Weighted Average' and diesel = 'Diesel (10 parts (sulphur) per million)') and A4.2 (coal = latest value (2018)) of https://environment.govt.nz/assets/Publications/Files/new-zealands-greenhouse-gas-inventory-1990-2018-vol-2-annexes_July2020.pdf)
#    
#    
# 3.	For each thermal station calculate: carbonImpost(date) = HR\*(EF/1000\*carbonPrice(date))
# where: 
#    - carbonImpost(date) is a series in \$/MWh
#    - carbonPrice(date) is the price series in \$/tonne of CO2 equivalent
#    - HR, EF as above

# ### Import Carbon Price Series

# In[ ]:


carbonPriceDaily=pd.read_csv(inputPath+'carbonPriceDaily.csv',parse_dates=[0],index_col=[0])


# In[ ]:


carbonPriceDaily.tail()


# In[ ]:


# Convert to monthly 
carbonPrice=carbonPriceDaily.resample('MS').mean()
carbonPrice.tail(12)


# In[ ]:


carbonPrice.plot(figsize=[12,8],marker='.')
plt.ylabel('$NZ/tonne CO2', fontsize=14)
plt.grid()


# ### Determine Carbon Price Impact on SRMC of each thermal plant

# In[ ]:


params=pd.read_csv(inputPath+'carbonFactors.csv',index_col=[0])
params


# In[ ]:


stations=params.index.values.tolist()
stations


# In[ ]:


params=params.to_dict('index')
params


# In[ ]:


CarbonImpost=carbonPrice.copy()

for stn in stations:
    CarbonImpost[stn]=params[stn]['Heat Rate (GJ/MWh)']*(params[stn]['Emission Factor (t CO2/TJ)']/1000*CarbonImpost.carbon)
CarbonImpost.drop(columns=['carbon'],inplace=True)


# In[ ]:


CarbonImpost.tail()


# In[ ]:


ax=CarbonImpost.plot(figsize=[12,8])
plt.ylabel('$/MWh', fontsize=14)
ax.set_title('Carbon Impost', fontsize=20)
plt.grid()


# In[ ]:


#Use Rankines on coal (worst case approx)
CarbonImpost=pd.DataFrame(CarbonImpost.max(axis=1))

#or use CCGT to get a (sort of) lower bound
# CarbonImpost=pd.DataFrame(CarbonImpost.min(axis=1))

CarbonImpost.index.set_names('tradingDateTime', inplace=True)
CarbonImpost.head()


# ### No carbon impact when no thermal generation

# #### Get total thermal plant generation (ignore geothermals for now)

# In[ ]:


def getthermaldata():
    ThermalGen = spark.sql("""
    SELECT
      case
        when DaylightSavingIndicator=='NZST' 
          then CONCAT(TradingDate,' ',TradingPeriodStartTime,' +1200')
        when DaylightSavingIndicator=='NZDT' 
          then CONCAT(TradingDate,' ',TradingPeriodStartTime,' +1300')
      end as tradingDateTime,
      cast(sum(SPDGenerationMegawatts) as float) as ThermalGen
    FROM gold.spdnodal
    where 
        CONCAT(PointOfConnectionCode,' ',UnitCode) in ('HLY2201 HLY1','HLY2201 HLY2','HLY2201 HLY3','HLY2201 HLY4','HLY2201 HLY5','HLY2201 HLY6',
        'JRD1101 JRD0','MKE1101 MKE1','NPL2201 NPL3','SFD2201 SFD21', 'SFD2201 SFD22','SFD2201 SPL0','OTA1101 OTG0','OTA2202 OTC0','SWN2201 SWN0',
        'SWN2201 SWN1','SWN2201 SWN5','WHI2201 WHI0')
        and CaseTypeCode = 'FP'
    group by tradingDateTime
     """).toPandas()
# 
    ThermalGen.tradingDateTime=pd.to_datetime(ThermalGen.tradingDateTime, utc=True).dt.tz_convert(None) #create timezone naive UTC time
    ThermalGen.set_index(['tradingDateTime'],inplace=True)
    ThermalGen.sort_index(inplace=True)
    return ThermalGen


# In[ ]:


ThermalGen=getthermaldata()
ThermalGen.head(10)


# In[ ]:


ThermalGen.tail()


# In[ ]:


#cut at 6/6/22 to keep the same dates as what Roger had
ThermalGen = ThermalGen[:'2022/6/30']
ThermalGen.tail(10)


# In[ ]:


ThermalGen.shape


# In[ ]:


ThermalGen.ThermalGen.sort_values().reset_index(drop=True).head(10000).plot()


# In[ ]:


# percent of trading periods with no thermal gen
ThermalGen[ThermalGen.ThermalGen==0.].count()/ThermalGen.shape[0]*100


# In[ ]:


# Only 0.2% of trading periods have no thermal, 
# however most of them are concentrated in late 2021 (where carbon price is high), 
# so only count trading periods with thermal generation.


# In[ ]:


ThermalGen.plot(figsize=[20,10])
plt.grid()


# In[ ]:


ThermalGen['2021-09-01':'2021-12-31'].plot(figsize=[20,10])#,marker='.')


# In[ ]:


temp=ThermalGen.join(CarbonImpost, how='outer')
temp[0]=temp[0].fillna(method='ffill')


# In[ ]:


temp.plot()
plt.grid()


# In[ ]:


CarbonImpost=ThermalGen.join(CarbonImpost, how='outer')
CarbonImpost[0]=CarbonImpost[0].fillna(method='ffill')


# In[ ]:


CarbonImpost=CarbonImpost[CarbonImpost.ThermalGen.notna()]
CarbonImpost.head()


# In[ ]:


CarbonImpost.plot()


# In[ ]:


# Set impost to zero when no thermal gen
CarbonImpost.loc[CarbonImpost.ThermalGen==0.,0]=0


# In[ ]:


CarbonImpost['2021-09-01':'2021-12-31'].plot(figsize=[20,10])


# In[ ]:


CarbonImpost=CarbonImpost[0]


# In[ ]:


CarbonImpost.plot()


# ### Get renewable generation by Trader (include geothermals for now)

# In[ ]:


# Roger's new - joins to offers table to get participant code (to reduce amount of data returned)

def getRenewableGenJoinOffers():
    RenewableGen = spark.sql("""
    SELECT
      case
        when n.DaylightSavingIndicator=='NZST' 
          then CONCAT(n.TradingDate,' ',n.TradingPeriodStartTime,' +1200')
        when n.DaylightSavingIndicator=='NZDT' 
          then CONCAT(n.TradingDate,' ',n.TradingPeriodStartTime,' +1300')
      end as tradingDateTime,
      ParticipantCode as Trader_Id,
      cast(sum(SPDGenerationMegawatts) as float) as RenewableGen
    FROM gold.spdnodal n
        left join gold.offers o on n.PointOfConnectionCode == o.PointOfConnectionCode AND 
                                   n.UnitCode == o.UnitCode AND
                                   n.TradingDate == o.TradingDate AND
                                   n.TradingPeriodNumber == o.TradingPeriodNumber
    where 
        CONCAT(n.PointOfConnectionCode,' ',n.UnitCode) not in ('HLY2201 HLY1','HLY2201 HLY2','HLY2201 HLY3','HLY2201 HLY4','HLY2201 HLY5','HLY2201 HLY6',
        'JRD1101 JRD0','MKE1101 MKE1','NPL2201 NPL3','SFD2201 SFD21','SFD2201 SFD22','SFD2201 SPL0','OTA1101 OTG0','OTA2202 OTC0','SWN2201 SWN0',
        'SWN2201 SWN1','SWN2201 SWN5','WHI2201 WHI0')
        and n.CaseTypeCode = 'FP'
        and IsLatestSubmissionFlag = 'Y'
        and o.ProductType = 'Energy'
        and o.TrancheNumber = 1
    group by tradingDateTime, Trader_Id
    """).toPandas()

    RenewableGen.tradingDateTime=pd.to_datetime(RenewableGen.tradingDateTime, utc=True).dt.tz_convert(None) #create timezone naive UTC time
    RenewableGen.set_index(['tradingDateTime','Trader_Id'],inplace=True)
    RenewableGen.sort_index(inplace=True)
    return RenewableGen
  

# In[ ]:


RenewableGen=getRenewableGenJoinOffers()


# In[ ]:


RenewableGen.head()


# In[ ]:


RenewableGen.groupby(level=1).count()


# In[ ]:


RenewableGen.head()


# In[ ]:


RenewableGen.tail()


# #### Trim to clean start and end of financial years

# In[ ]:


RenewableGen = RenewableGen['2013-07-01':'2022-06-30 23:30']
RenewableGen.tail()


# In[ ]:


RenewableGen.groupby(level=1).sum().sort_values(by='RenewableGen',ascending=False).plot(kind='bar')


# In[ ]:


RenewableGen.head(13)


# In[ ]:


RenewableGen.shape


# ### Calculate carbon windfall

# In[ ]:


CarbonImpost=RenewableGen.join(CarbonImpost)
CarbonImpost.rename(columns={0:'CarbonImpost'},inplace=True)
CarbonImpost.head(15)


# In[ ]:


CarbonImpost['Dollars']=CarbonImpost.CarbonImpost*CarbonImpost.RenewableGen/2
CarbonImpost.head()


# In[ ]:


CarbonImpost=CarbonImpost.Dollars.unstack()
CarbonImpost.head()


# #### Group into financial years

# In[ ]:


CarbonImpost6M=CarbonImpost.resample('6MS').sum()
CarbonImpost6M


# In[ ]:


CarbonImpost6M.reset_index(inplace=True)
CarbonImpost6M


# In[ ]:


finYrStrt=CarbonImpost6M.loc[CarbonImpost6M.index.map(lambda x: x%2==0)][['tradingDateTime']].reset_index(drop=True)
finYrStrt


# In[ ]:


# CarbonImpost6M['group']=[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12]
CarbonImpost6M['group']=[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8]
CarbonImpost6M


# In[ ]:


CarbonImpost6M=CarbonImpost6M.groupby('group').sum()
CarbonImpost6M


# In[ ]:


CarbonImpost6M=CarbonImpost6M.join(finYrStrt).set_index('tradingDateTime')
CarbonImpost6M


# In[ ]:


CarbonImpost6M.rename(index=lambda x: str(x.year)+'-'+str(x.year+1), inplace=True)


# In[ ]:


CarbonImpost6M


# In[ ]:


CarbonImpost6M.sum().sort_values()


# In[ ]:


CarbonImpost6M.fillna(value=0.,inplace=True)
CarbonImpost6M['Total'] = CarbonImpost6M.sum(axis=1)
CarbonImpost6M['Big4']=CarbonImpost6M[['MERI','GENE','CTCT','MRPL']].sum(axis=1)
CarbonImpost6M['Other']=CarbonImpost6M['Total']-CarbonImpost6M['Big4']


# In[ ]:


CarbonImpost6M


# In[ ]:


ax=(CarbonImpost6M[['GENE','Other','MRPL','CTCT','MERI']]/1e6).plot(kind='bar',figsize=[20,10])
ax.set_ylabel('Million $',fontsize=18)
ax.set_xlabel('Financial Year',fontsize=18)
plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
ax.set_title('Estimated annual windfall gains due to carbon price',fontsize=24)
plt.grid(axis='y')


# In[ ]: