GOLDEN GATE
#################################################################
Commands
#################################################################
send
EAUSFLD3 status
info EAUSFLD2 , showch
send EAUSFLD2 , showtrans
info EAUSFLD3 , detail
stats EAUSFLD3
-- Use the HISTORY
command to display a list of previously executed commands.
-- Use the ! command
to execute a previous command again without editing it.
-- Use the FC command
to edit a previous command and then execute it again.
echo $GGBASE --
$ORACLE_HOME/ggsci
connect /
dblogin userid gguser , password gguser
pugre old extract --
in param extract file
list table *
info all
info mgr
info manager
info extract
info e01123 , showch
-- Show checkpoints
info extract e_cust
info extract e_cust, detail
info extract e_cust, showch
info extract *, tasks
info extract *, allprocesses
info extract ext1, detail
info e01123 , showch
-- Show checkpoints
info extract e_cust
info extract e_cust, detail
info extract e_cust, showch
info extract *, tasks
info extract *, allprocesses
info exttrail *
info trandata SCOTT.*
GGSCI (server) 6> shell pwd
KILL EXTRACT group name
LAG EXTRACT
lag extract ext*
view param mgr
edit param mgr
start mgr
start <group>
start *
stop er *
info er *
GGSCI> START EXTRACT <extract name>, DETAIL
add trandata amer.*
info trandata amer.*
-info all
–info mgr
–info <group>
–send <group>, status
–info <group>, showch# checkpoint info, with required
arch log #
send <group>, status
send extract ext1 status
send extract ext2, getlag
send manager childstatus
SEND MANAGER GETPURGEOLDEXTRACTS
status manager
status extract ext1
stats extract ext2 reportrate hr
VIEW GGSEVT
view report ext1
view report rep1
view report mgr
SHOWTRANS
–lag <group>
•Use LAG EXTRACT to determine a true lag time between
Extract and the data source. Lag time is the difference in seconds between the
time
A record was processed by Extract and the timestamp of that
record in the data source.
•Use LAG REPLICAT to determine a true lag time between
Replicat and the trail. Lag time is the difference, in seconds, between the
time that the last record was processed by Replicatand the timestamp of the
record in the trail.
–view params<group> (to view the parameter file)
–alter <group>, etrollover(to create/increment a new
trail file)
–alter <group>, extseqno<#>, extrba<rba>
(To designate a begin point in trail file)
–alter <group>, extseqno<#>, extrba1024 (To
start from new archive or online logfiles. Oracle reserves 1024 bytes for file
header info)
–alter <group>, begin now (Begins processing at the
current time)
–alter <group>, begin 2003-12-25 (Begins processing to
the specific datetime)
–refresh mgr(Enables to change the parameter without
stopping and starting the Manager process. The exception is changing the port number)
–send manger, childstatus(Retrieves status information about
processes started by Manager.)
–stats <group> (to display statistics for one or more
groups.)
#################################################################
GG
VIEWS
#################################################################
COLUMN LOG_GROUP_NAME HEADING 'Log
Group' FORMAT A20
COLUMN TABLE_NAME HEADING 'Table'
FORMAT A15
COLUMN ALWAYS HEADING 'Conditional
or|Unconditional' FORMAT A14
COLUMN LOG_GROUP_TYPE HEADING 'Type
of Log Group' FORMAT A20
SELECT
LOG_GROUP_NAME,
TABLE_NAME,
DECODE(ALWAYS,
'ALWAYS', 'Unconditional',
'CONDITIONAL', 'Conditional')
ALWAYS,
LOG_GROUP_TYPE
FROM DBA_LOG_GROUPS;
select SUPPLEMENTAL_LOG_DATA_MIN
from v$database;
SELECT s.inst_id,
s.sid,s.serial#,t.start_time,
s.status
FROM GV$session s, GV$transaction
t, GV$rollstat r
WHERE s.saddr=t.ses_addr
and t.xidusn=r.usn
and s.inst_id=t.inst_id
and t.inst_id=r.inst_id
order by t.start_time;
/*
Conn / as sysdba
Col used format a8
Prompt current_rba will show where redo is
being written to
Select
le.leseq log_sequence#,
cp.cpodr_bno * le.lebsz current_rba,
le.lesiz * le.lebsz log_size,
substr(to_char(100 * cp.cpodr_bno
/ le.lesiz, '999.00'), 2) || '%' used
from sys.x$kcccp cp,
sys.x$kccle le
where le.inst_id =
userenv('Instance') and
cp.inst_id = userenv('Instance')
and
le.leseq = cp.cpodr_seq
and le.leseq > 0
*/
#################################################################
Troubleshoot
#################################################################
main issues :
gap , uncommitted transaction , gg abended , rep errors --
unique key , no transaction ,
checkpoint table corrupted , missing archives ,
--> info all command was not sowing lag after extra
rollover . have to use "send extract status,"
troubleshoot gap in case of missing arcive :
-------------------------------------------------------
-- stop replication
-- export/import > refresh
-- start replication from that csn/time
FOR ERROR CHECK : ggserr.log , tcperrs , view GGSEVT ,
,logdump
Discard file , report file , sqlnet file , check if rba number is changing ,
Go to arch directory : fuser
replicate side : reperror 1401 discard , reperror 0001
discard ,
lsof -p <the process ID for one of the
replicats you have running> -- to
check which process is writing to which file
-- ggserr.log: GG
reports global level INFO, WARNING and ERROR messages in this file at GGBASE
directory.
-- The Report file stores process level messages at
dirrpt/<PROCESS_NAME>.rpt. Each process should have its own rpt file. ex.
E0079.rpt. New report file will be
created each time the process starts and old one will be rolled-over. GG maintains
10 rolled-over files.
-- Discard file stores
the vital RBA information, location of the problematic record within a
transaction and record details. It also
saves all the discarded records, if discard flag (REPERROR) turn on.
Normally discard file is located at
dirout/<PROCESS_NAME_PDB_RDB>.dsc
-- Always communicate with user during troubleshooting
particularly when you planning to skip transaction. User has to approve before
you skip the transaction or a record.
-- The REPERROR flag skips all the subsequent records that
encountered specific error until turned off. No way to skip only one
specific record using this flag.
Replication issues :
-- Most of the troubleshooting techniques due to data
inconsistency between primary and target are similar to primary key issue that covered in Alert 1. You can use the same
tactics to resolve Alert 2 errors..
-- REPERROR flag discards records that encountered specific
Oracle error and saves discarded details in discard file.
-- REPERROR skips only a problematic record/row; not entire
transaction.
-- Most common REPERROR flags:
–REPERROR 0001 discard: Unique Constraint violation. Insert
or update statement fails.
–REPERROR 1403 discard: No data found; update or delete
statement fails.
–REPERROR 2292 discard: Integrity constraint .violated
-child record found. Update or delete fails.
–REPERROR 28117 discard: Integrity constraint violated
-parent record not found. Insert or update fails.
Main issues on
replicate :
Alert 1 –Primary Key Issue
Alert 2 –Data Inconsistency Errors
Alert 3 –Uncommitted transaction
Alert 4 –Lag Checkpoint Exceeded Limit
Alert 5 –Data not moving
Alert 6 –Incompatible Record Format -- fixed from gg 8 – scanforheader
How to sync gg
when when out of sync :
-- stop gg
-- insert using dblink/export import
-- start gg -- wiuth skip errors . remove skip errors when
gg is in sync
or
-- export and start replicate .. ( good if started from csn
number )
Consider MAXCOMMITPROPAGATIONDELAY parameter in extract, if
extract abends on SCN error.
THREADOPTIONS MAXCOMMITPROPAGATIONDELAY 30000, IOLATENCY
3000
#################################################################
Skip Transaction
#################################################################
#################################################################
replicat
paramfile dirprm/rtwflxd3.prm skiptransaction
send extract oow_ex1, showtrans
send extract oow_ex1, skiptrans 5.28.1323
FORCE
send E_CBDS showtrans 688.6.2315775 file tran_688.6.2315775.dmp detail
SEND EXTRACT E_CBDS , SKIPTRANS
688.6.2315775 THREAD 1
#################################################################
#################################################################
Tracing
#################################################################
ggsci> send <rep_name> trace2
./dirrpt/rep_name.trc
wait for 3-4 minutes
ggsci> send <rep_name> trace2 off
--->> get the trace file in rep_name.trc
“send Extract , trace off” for Extract, “send
Replicat , trace off” for Replicat
send extract EXT1 trace
/tmp/trace_me.trc
TRACE | TRACE2
Valid For
Extract and Replicat
Description
Use the TRACE and TRACE2 parameters to
capture Extract or Replicat processing information to help reveal processing
bottlenecks. Both support the tracing of DML and DDL.
Tracing also can be turned on and off by
using the SEND EXTRACT or SEND REPLICAT command in GGSCI. See "SEND
EXTRACT" or "SEND REPLICAT".
Contact Oracle Support for assistance if
the trace reveals significant processing bottlenecks.
Default
No tracing
Syntax
TRACE | TRACE2
[, DDL[INCLUDE] | DDLONLY]
[, [FILE] file_name]
[, THREADS (threadID[, threadID][, ...][,
thread_range[, thread_range][, ...])]
TRACE
Provides step-by-step processing
information.
TRACE2
Identifies the code segments on which
Extract or Replicat is spending the most time.
DDL[INCLUDE] | DDLONLY
(Replicat only) Enables DDL tracing and
specifies how DDL tracing is included in the trace report.
DDL[INCLUDE]
Traces DDL and also traces transactional
data processing. This is the default. Either DDL or DDLINCLUDE is valid.
DDLONLY
Traces DDL but does not trace transactional
data.
[FILE] file_name
The relative or fully qualified name of a
file to which Oracle GoldenGate logs the trace information. The FILE keyword is
optional, but must be used if other parameter options will follow the file
name, for example:
TRACE FILE file_name DDLINCLUDE
If no other options will follow the file
name, the FILE keyword can be omitted, for example:
TRACE DDLINCLUDE file_name
THREADS (threadID[, threadID][, ...][,
thread_range[, thread_range][, ...])
Enables tracing only for the specified
thread or threads of a coordinated Replicat. Tracing is only performed for
threads that are active at runtime.
threadID[, threadID][, ...]
Specifies a thread ID or a comma-delimited
list of threads in the format of threadID, threadID, threadID.
[, thread_range[, thread_range][, ...]
Specifies a range of threads in the form of
threadIDlow-threadIDhigh or a comma-delimted list of ranges in the format of
threadIDlow-threadIDhigh, threadIDlow-threadIDhigh.
A combination of these formats is
permitted, such as threadID, threadID, threadIDlow-threadIDhigh.
If the Replicat is in coordinated mode and
TRACE is used with a THREADS list or range, a trace file is created for each
currently active thread. Each file name is appended with its associated thread
ID. This method of identifying trace files by thread ID does not apply when
SEND REPLICAT is issued by groupname with threadID (as in SEND REPLICAT fin003
TRACE...) or when only one thread is specified with THREADS.
Contact Oracle Support for assistance if
the trace reveals significant processing bottlenecks.
Examples
Example 1
The following traces to a file named
trace.trc. If this is a coordinated Replicat group, the tracing applies to all
active threads.
TRACE /home/ggs/dirrpt/trace.trc
Example 2
The following enables tracing for only
thread 1. In this case, because only one thread is being traced, the trace file
will not have a threadID extension. The file name is trace.trc.
TRACE THREADS(1) FILE ./dirrpt/trace.trc
Example 3
The following enables tracing for threads
1,2, and 3. Assuming all threads are active, the tracing produces files
trace001, trace002, and trace003.
TRACE THREADS(1-3) FILE ./dirrpt/trace.trc
#################################################################
Stopping process forcefully
#################################################################
stop extract ehahk !
stop extract ehahk force
#################################################################
Bounded Recovery
#################################################################
Bounded Recovery is a component of Oracle GoldenGate’s Extract process checkpointing facility. It guarantees an efficient recovery after Extract stops for any reason, planned or unplanned, no matter how many open (uncommitted) transactions there were at the time that Extract stopped, nor how old they were. Bounded Recovery sets an upper boundary for the maximum amount of time that it would take for Extract to recover to the point where it stopped and then resume normal processing.
Bounded Recovery is new feature in OGG 11.1, this is how it works:
A transaction qualifies as long-running if it has been open longer than one Bounded Recovery interval, which is specified with the BRINTERVAL option of the BR parameter.
For example, if the Bounded Recovery interval is four hours, a long-running open transaction is any transaction that started more than four hours ago.
At each Bounded Recovery interval, Extract makes a Bounded Recovery checkpoint, which persists the current state and data of Extract to disk, including the state and data (if any) of long-running transactions. If Extract stops after a Bounded Recovery checkpoint, it will recover from a position within the previous Bounded Recovery interval or at the last Bounded Recovery checkpoint, instead of processing from the log position where the oldest open long-running transaction first appeared, which could be several trail files ago.
The BR checkpoint information is shown in the SHOWCH output starting with OGG v11.1.1.1
Bounded Recovery is enabled by default for Extract processes and has a 4 hour BR interval. To adjust the BR interval to say 24 hours, use the following syntax in your Extract parameter file:
BR BRINTERVAL 24, BRDIR BR
The default location for BR checkpoint files is the GoldenGate home directory. This can be altered by including a full path:
BR BRINTERVAL 24, BRDIR /ggsdata/brcheckpoint
We only need to cater is there is no lag and checkpoint is done before stopping .
Move trail files to Dr site . get more information from below Oracle doc
1323670.1 Best Practice - Oracle GoldenGate and Oracle Data Guard - Switchover/Fail-over Operations
=> Defining reperror
REPERROR (DEFAULT, EXCEPTION)
REPERROR (DEFAULT2, ABEND)
REPERROR (-1, EXCEPTION
=> Creating exception table
Doc ID 1382092.1)
GoldenGate does not provide a standard exceptions handler. By default, a Replicat process will abend should any operational failure occur, and will rollback the transaction to the last known checkpoint. This may not be ideal in a production environment.
The HANDLECOLLISIONS and NOHANDLECOLLISIONS parameters can be used to control whether or not Replicat tries to resolve duplicate-record and missing-record errors, but should these errors be ignored?
To determine what error has occurred, by which Replicat, caused by what data, create an Exceptions handler that will trap and log the specified Oracle error(s), but allow the Replicat to continue to process data.
Here is an example
drop table ggddlusr.exceptions
/
create table ggddlusr.exceptions
(rep_name varchar2(8),
table_name varchar2(61),
errno number,
dberrmsg varchar2(4000),
optype varchar2(20),
errtype varchar2(20),
logrba number,
logposition number,
committimestamp timestamp)
/
replicat r1
SETENV (ORACLE_HOME="/oracle/software/rdbms/11.2.0.2")
SETENV (ORACLE_SID="test")
SETENV (NLS_LANG = "AMERICAN_AMERICA.UTF8")
userid ggddlusr,password test
--Start of the Macro
MACRO #exception_handler
BEGIN
, TARGET ggddlusr.exceptions
, colmap ( rep_name = "R1"
, table_name = @GETENV ("GGHEADER", "TABLENAME")
, errno = @GETENV ("LASTERR", "DBERRNUM")
, dberrmsg = @GETENV ( "LASTERR", "DBERRMSG")
, optype = @GETENV ( "LASTERR", "OPTYPE")
, errtype = @GETENV ( "LASTERR", "ERRTYPE")
, logrba = @GETENV ( "GGHEADER", "LOGRBA")
, logposition = @GETENV ( "GGHEADER", "LOGPOSITION")
, committimestamp = @GETENV ( "GGHEADER", "COMMITTIMESTAMP") )
, INSERTALLRECORDS
, EXCEPTIONSONLY;
END;
--End of the Macro
Reportcount every 30 Minutes, Rate
Report at 01:00
ReportRollover at 01:15
discardfile /oracle/software/goldengate/11.1.1.1/dirrpt/r1.dsc, megabytes 50, append
DDL INCLUDE MAPPED &
EXCLUDE OBJNAME "GGDDLUSR.EXCEPTIONS"
AssumeTargetDefs
REPERROR (DEFAULT, EXCEPTION)
REPERROR (DEFAULT2, ABEND)
Map pubs.*, target pubs.*;
MAP pubs.* #exception_handler()
#################################################################
Stopping process forcefully
#################################################################
stop extract ehahk !
stop extract ehahk force
#################################################################
Bounded Recovery
#################################################################
Bounded Recovery is a component of Oracle GoldenGate’s Extract process checkpointing facility. It guarantees an efficient recovery after Extract stops for any reason, planned or unplanned, no matter how many open (uncommitted) transactions there were at the time that Extract stopped, nor how old they were. Bounded Recovery sets an upper boundary for the maximum amount of time that it would take for Extract to recover to the point where it stopped and then resume normal processing.
- BR Begin Recovery Checkpoint:
This is similar to Standard recovery checkpoint.
This is the first file that would be required for recovery.
Whole or parts of transactions are restored by BR from BR files.
Manually deleting the BR files is not recommended.
- BR End Recovery Checkpoint:
The end of bounded recovery is where the extract will begin to process records normally from redo or archive logs.
This is similar to standard current checkpoint
Bounded Recovery is new feature in OGG 11.1, this is how it works:
A transaction qualifies as long-running if it has been open longer than one Bounded Recovery interval, which is specified with the BRINTERVAL option of the BR parameter.
For example, if the Bounded Recovery interval is four hours, a long-running open transaction is any transaction that started more than four hours ago.
At each Bounded Recovery interval, Extract makes a Bounded Recovery checkpoint, which persists the current state and data of Extract to disk, including the state and data (if any) of long-running transactions. If Extract stops after a Bounded Recovery checkpoint, it will recover from a position within the previous Bounded Recovery interval or at the last Bounded Recovery checkpoint, instead of processing from the log position where the oldest open long-running transaction first appeared, which could be several trail files ago.
The BR checkpoint information is shown in the SHOWCH output starting with OGG v11.1.1.1
Bounded Recovery is enabled by default for Extract processes and has a 4 hour BR interval. To adjust the BR interval to say 24 hours, use the following syntax in your Extract parameter file:
BR BRINTERVAL 24, BRDIR BR
The default location for BR checkpoint files is the GoldenGate home directory. This can be altered by including a full path:
BR BRINTERVAL 24, BRDIR /ggsdata/brcheckpoint
Manually create checkpoint : recommended before stopping extract
send extsha1, br brcheckpoint immediate
#################################################################
Golden Gate in case of Dataguard switchover / Failover
#################################################################We only need to cater is there is no lag and checkpoint is done before stopping .
Move trail files to Dr site . get more information from below Oracle doc
1322547.1 Best Practice - Oracle GoldenGate and Oracle Data Guard - Switchover/Fail-over Operations for GoldenGate
Controlled DR Tests Using Dataguard with Goldengate in the Mix (Doc ID 1672938.1)
#################################################################
Golden Gate in case of Falshback
#################################################################
If database is flashback , we need to reset Gg capture process accordingly . Refer Doc ID 1626736.1 for more information
SQL> select min (OLDEST_FLASHBACK_TIME) from v$flashback_database_log;
MIN(OLDEST_FLASHB
-----------------
26-FEB-2017 07:25
ggsci
stop replicat CSCURPCH;
SHUTDOWN IMMEDIATE;
STARTUP MOUNT;
flashback database to timestamp TO_TIMESTAMP( ‘2017-02-26 08:45:00′,’YYYY-MM-DD HH24:MI:SS’);
ALTER DATABASE OPEN READ ONLY;
SHUTDOWN IMMEDIATE
STARTUP MOUNT
ALTER DATABASE OPEN RESETLOGS;
ggsci
alter replicat CSCURPCH, begin 2017-02-26 08:45:00
start replicat CSCURPCH
Ideally we cannot add tables without having primary key or primary key is disabled . Also if some wanted columns are not part of primary key . In this scenario we can use KEYCOLS
If database is flashback , we need to reset Gg capture process accordingly . Refer Doc ID 1626736.1 for more information
SQL> select min (OLDEST_FLASHBACK_TIME) from v$flashback_database_log;
MIN(OLDEST_FLASHB
-----------------
26-FEB-2017 07:25
ggsci
stop replicat CSCURPCH;
SHUTDOWN IMMEDIATE;
STARTUP MOUNT;
flashback database to timestamp TO_TIMESTAMP( ‘2017-02-26 08:45:00′,’YYYY-MM-DD HH24:MI:SS’);
ALTER DATABASE OPEN READ ONLY;
SHUTDOWN IMMEDIATE
STARTUP MOUNT
ALTER DATABASE OPEN RESETLOGS;
ggsci
alter replicat CSCURPCH, begin 2017-02-26 08:45:00
start replicat CSCURPCH
#################################################################
Tables without primary Key --> KEYCOLS
#################################################################
Ideally we cannot add tables without having primary key or primary key is disabled . Also if some wanted columns are not part of primary key . In this scenario we can use KEYCOLS
TABLE OWNER.table_name ,KEYCOLS (column1,column2);
Refer Doc ID 1271578.1 for more detail
#################################################################
Exception and Error Handling
#################################################################
=> Defining reperror
REPERROR (DEFAULT, EXCEPTION)
REPERROR (DEFAULT2, ABEND)
REPERROR (-1, EXCEPTION
=> Creating exception table
Doc ID 1382092.1)
GoldenGate does not provide a standard exceptions handler. By default, a Replicat process will abend should any operational failure occur, and will rollback the transaction to the last known checkpoint. This may not be ideal in a production environment.
The HANDLECOLLISIONS and NOHANDLECOLLISIONS parameters can be used to control whether or not Replicat tries to resolve duplicate-record and missing-record errors, but should these errors be ignored?
To determine what error has occurred, by which Replicat, caused by what data, create an Exceptions handler that will trap and log the specified Oracle error(s), but allow the Replicat to continue to process data.
Here is an example
drop table ggddlusr.exceptions
/
create table ggddlusr.exceptions
(rep_name varchar2(8),
table_name varchar2(61),
errno number,
dberrmsg varchar2(4000),
optype varchar2(20),
errtype varchar2(20),
logrba number,
logposition number,
committimestamp timestamp)
/
replicat r1
SETENV (ORACLE_HOME="/oracle/software/rdbms/11.2.0.2")
SETENV (ORACLE_SID="test")
SETENV (NLS_LANG = "AMERICAN_AMERICA.UTF8")
userid ggddlusr,password test
--Start of the Macro
MACRO #exception_handler
BEGIN
, TARGET ggddlusr.exceptions
, colmap ( rep_name = "R1"
, table_name = @GETENV ("GGHEADER", "TABLENAME")
, errno = @GETENV ("LASTERR", "DBERRNUM")
, dberrmsg = @GETENV ( "LASTERR", "DBERRMSG")
, optype = @GETENV ( "LASTERR", "OPTYPE")
, errtype = @GETENV ( "LASTERR", "ERRTYPE")
, logrba = @GETENV ( "GGHEADER", "LOGRBA")
, logposition = @GETENV ( "GGHEADER", "LOGPOSITION")
, committimestamp = @GETENV ( "GGHEADER", "COMMITTIMESTAMP") )
, INSERTALLRECORDS
, EXCEPTIONSONLY;
END;
--End of the Macro
Reportcount every 30 Minutes, Rate
Report at 01:00
ReportRollover at 01:15
discardfile /oracle/software/goldengate/11.1.1.1/dirrpt/r1.dsc, megabytes 50, append
DDL INCLUDE MAPPED &
EXCLUDE OBJNAME "GGDDLUSR.EXCEPTIONS"
AssumeTargetDefs
REPERROR (DEFAULT, EXCEPTION)
REPERROR (DEFAULT2, ABEND)
Map pubs.*, target pubs.*;
MAP pubs.* #exception_handler()
#################################################################
Golden Gate in Rac Environment
#################################################################
Apart from normal GG installation for Rac gg installation 2 main task is to decide cluster filesystem and to add gg in Cluster as application resource
Also in Rac configuration we need to move dirpcs to shared drive / distributed file system environment like dirprm , dirdat , dirchk , dirtmp , dirrpt . Goldengate instance will be active on one node and in case of a node reboot or node eviction or node is down for a scheduled maintenance, the Goldengate process should automatically fail over to the other surviving nodes.
>> Extract can only run against one instance
>> If instance fails,
Manager must be stopped on failed node:
Manager and extract must be started on a surviving node
>> Failover can be configured in Oracle Grid Infrastructure
Update the below vip in the /etc/hosts file on both the nodes , ( the vip should be on the same
subnet of the public ip).
--> add Vip
Oracle Clusterware runs resource-specific commands through an entity called an agent.
The agent script must be able to accept 5 parameter values: start, stop, check, clean and abort (optional).
Now we will create an script to and will also place the script in the shared location, here we have placed the script
under the gg home which will be accessed on both the nodes. (This is the sample script provided by oracle we can also have a customized script as per our requirement).
--> add gg to cluster
-->Testing
References :
Also in Rac configuration we need to move dirpcs to shared drive / distributed file system environment like dirprm , dirdat , dirchk , dirtmp , dirrpt . Goldengate instance will be active on one node and in case of a node reboot or node eviction or node is down for a scheduled maintenance, the Goldengate process should automatically fail over to the other surviving nodes.
>> Extract can only run against one instance
>> If instance fails,
Manager must be stopped on failed node:
Manager and extract must be started on a surviving node
>> Failover can be configured in Oracle Grid Infrastructure
Not covering ACFS creation in this blog
Adding GG in Cluster as application resource :
For adding GG resource to cluster we need to first create application vip resource .
Update the below vip in the /etc/hosts file on both the nodes , ( the vip should be on the same
subnet of the public ip).
--> add Vip
cd /optware/grid/11.2.0.4/bin
./appvipcfg create -network=1 -ip=<x.x.x.x> -vipname=pggs-vip -user=root
./crsctl setperm resource pggs-vip -u user:oracle:r-x
cd /optware/grid/11.2.0.4/bin
./crsctl status resource pggs-vip -t
./crsctl start resource pggs-vip -c hostname
./crsctl status resource pggs-vip -t
--> deploy agent script
The agent script must be able to accept 5 parameter values: start, stop, check, clean and abort (optional).
Now we will create an script to and will also place the script in the shared location, here we have placed the script
under the gg home which will be accessed on both the nodes. (This is the sample script provided by oracle we can also have a customized script as per our requirement).
GG_HOME
mkdir crs_gg_script
cd crs_gg_script
vi gg_action.scr
chmod 750 gg_action.scr
cd /optware/grid/11.2.0.4/bin/
ls -ltr /ggdata/csgg/crs_gg_script/gg_action.scr
#!/bin/sh
#goldengate_action.scr
. ~oracle/.bash_profile
[ -z "$1" ]&& echo "ERROR!! Usage $0 "&& exit 99
GGS_HOME=/golden_gate
#specify delay after start before checking for successful start
start_delay_secs=5
#Include the Oracle GoldenGate home in the library path to start GGSCI
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${GGS_HOME}
#set the oracle home to the database to ensure Oracle GoldenGate will get
#the right environment settings to be able to connect to the database
export ORACLE_HOME=/u01/app/oracle/product/11.2/db
export CRS_HOME=/grid/11.2
#Set NLS_LANG otherwise it will default to US7ASCII
export NLS_LANG=AMERICAN_AMERICA.AL32UTF8
logfile=/tmp/crs_gg_start.log
###########################
function log
###########################
{
DATETIME=`date +%d/%m/%y-%H:%M:%S`
echo $DATETIME "goldengate_action.scr>>" $1
echo $DATETIME "goldengate_action.scr>>" $1 >> $logfile
}
#check_process validates that a manager process is running at the PID
#that Oracle GoldenGate specifies.
check_process () {
dt=`date +%d/%m/%y-%H:%M:%S`
if ( [ -f "${GGS_HOME}/dirpcs/MGR.pcm" ] )
then
pid=`cut -f8 "${GGS_HOME}/dirpcs/MGR.pcm"`
if [ ${pid} = `ps -e |grep ${pid} |grep mgr |awk '{ print $1 }'` ]
then
#manager process is running on the PID . exit success
echo $dt "manager process is running on the PID . exit success">> /tmp/check.out
exit 0
else
#manager process is not running on the PID
echo $dt "manager process is not running on the PID" >> /tmp/check.out
exit 1
fi
else
#manager is not running because there is no PID file
echo $ dt"manager is not running because there is no PID file" >> /tmp/check.out
exit 1
fi
}
#call_ggsci is a generic routine that executes a ggsci command
call_ggsci () {
log "entering call_ggsci"
ggsci_command=$1
#log "about to execute $ggsci_command"
log "id= $USER"
cd ${GGS_HOME}
ggsci_output=`${GGS_HOME}/ggsci << EOF
${ggsci_command}
exit
EOF`
log "got output of : $ggsci_output"
}
case $1 in
'start')
#Updated by Sourav B (02/10/2011)
# During failover if the “mgr.pcm” file is not deleted at the node crash
# then Oracle clusterware won’t start the manager on the new node assuming the
# manager process is still running on the failed node. To get around this issue
# we will delete the “mgr.prm” file before starting up the manager on the new
# node. We will also delete the other process files with pc* extension and to
# avoid any file locking issue we will first backup the checkpoint files and then
# delete them from the dirchk directory.After that we will restore the checkpoint
# files from backup to the original location (dirchk directory).
log "removing *.pc* files from dirpcs directory..."
cd $GGS_HOME/dirpcs
rm -f *.pc*
log "creating tmp directory to backup checkpoint file...."
cd $GGS_HOME/dirchk
mkdir tmp
log "backing up checkpoint files..."
cp *.cp* $GGS_HOME/dirchk/tmp
log "Deleting checkpoint files under dirchk......"
rm -f *.cp*
log "Restore checkpoint files from backup to dirchk directory...."
cp $GGS_HOME/dirchk/tmp/*.cp* $GGS_HOME/dirchk
log "Deleting tmp directory...."
rm -rf tmp
log "starting manager"
call_ggsci 'start manager'
#there is a small delay between issuing the start manager command
#and the process being spawned on the OS . wait before checking
log "sleeping for start_delay_secs"
sleep ${start_delay_secs}
#check whether manager is running and exit accordingly
check_process
;;
'stop')
#attempt a clean stop for all non-manager processes
call_ggsci 'stop er *'
#ensure everything is stopped
call_ggsci 'stop er *!'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
'check')
check_process
exit 0
;;
'clean')
#attempt a clean stop for all non-manager processes
call_ggsci 'stop er *'
#ensure everything is stopped
call_ggsci 'stop er *!'
#in case there are lingering processes
call_ggsci 'kill er *'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
'abort')
#ensure everything is stopped
call_ggsci 'stop er *!'
#in case there are lingering processes
call_ggsci 'kill er *'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
esac
#goldengate_action.scr
. ~oracle/.bash_profile
[ -z "$1" ]&& echo "ERROR!! Usage $0 "&& exit 99
GGS_HOME=/golden_gate
#specify delay after start before checking for successful start
start_delay_secs=5
#Include the Oracle GoldenGate home in the library path to start GGSCI
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${GGS_HOME}
#set the oracle home to the database to ensure Oracle GoldenGate will get
#the right environment settings to be able to connect to the database
export ORACLE_HOME=/u01/app/oracle/product/11.2/db
export CRS_HOME=/grid/11.2
#Set NLS_LANG otherwise it will default to US7ASCII
export NLS_LANG=AMERICAN_AMERICA.AL32UTF8
logfile=/tmp/crs_gg_start.log
###########################
function log
###########################
{
DATETIME=`date +%d/%m/%y-%H:%M:%S`
echo $DATETIME "goldengate_action.scr>>" $1
echo $DATETIME "goldengate_action.scr>>" $1 >> $logfile
}
#check_process validates that a manager process is running at the PID
#that Oracle GoldenGate specifies.
check_process () {
dt=`date +%d/%m/%y-%H:%M:%S`
if ( [ -f "${GGS_HOME}/dirpcs/MGR.pcm" ] )
then
pid=`cut -f8 "${GGS_HOME}/dirpcs/MGR.pcm"`
if [ ${pid} = `ps -e |grep ${pid} |grep mgr |awk '{ print $1 }'` ]
then
#manager process is running on the PID . exit success
echo $dt "manager process is running on the PID . exit success">> /tmp/check.out
exit 0
else
#manager process is not running on the PID
echo $dt "manager process is not running on the PID" >> /tmp/check.out
exit 1
fi
else
#manager is not running because there is no PID file
echo $ dt"manager is not running because there is no PID file" >> /tmp/check.out
exit 1
fi
}
#call_ggsci is a generic routine that executes a ggsci command
call_ggsci () {
log "entering call_ggsci"
ggsci_command=$1
#log "about to execute $ggsci_command"
log "id= $USER"
cd ${GGS_HOME}
ggsci_output=`${GGS_HOME}/ggsci << EOF
${ggsci_command}
exit
EOF`
log "got output of : $ggsci_output"
}
case $1 in
'start')
#Updated by Sourav B (02/10/2011)
# During failover if the “mgr.pcm” file is not deleted at the node crash
# then Oracle clusterware won’t start the manager on the new node assuming the
# manager process is still running on the failed node. To get around this issue
# we will delete the “mgr.prm” file before starting up the manager on the new
# node. We will also delete the other process files with pc* extension and to
# avoid any file locking issue we will first backup the checkpoint files and then
# delete them from the dirchk directory.After that we will restore the checkpoint
# files from backup to the original location (dirchk directory).
log "removing *.pc* files from dirpcs directory..."
cd $GGS_HOME/dirpcs
rm -f *.pc*
log "creating tmp directory to backup checkpoint file...."
cd $GGS_HOME/dirchk
mkdir tmp
log "backing up checkpoint files..."
cp *.cp* $GGS_HOME/dirchk/tmp
log "Deleting checkpoint files under dirchk......"
rm -f *.cp*
log "Restore checkpoint files from backup to dirchk directory...."
cp $GGS_HOME/dirchk/tmp/*.cp* $GGS_HOME/dirchk
log "Deleting tmp directory...."
rm -rf tmp
log "starting manager"
call_ggsci 'start manager'
#there is a small delay between issuing the start manager command
#and the process being spawned on the OS . wait before checking
log "sleeping for start_delay_secs"
sleep ${start_delay_secs}
#check whether manager is running and exit accordingly
check_process
;;
'stop')
#attempt a clean stop for all non-manager processes
call_ggsci 'stop er *'
#ensure everything is stopped
call_ggsci 'stop er *!'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
'check')
check_process
exit 0
;;
'clean')
#attempt a clean stop for all non-manager processes
call_ggsci 'stop er *'
#ensure everything is stopped
call_ggsci 'stop er *!'
#in case there are lingering processes
call_ggsci 'kill er *'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
'abort')
#ensure everything is stopped
call_ggsci 'stop er *!'
#in case there are lingering processes
call_ggsci 'kill er *'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
esac
--> add gg to cluster
./crsctl add resource ggateapp \
-type cluster_resource \
-attr "ACTION_SCRIPT= /ggdata/csgg/crs_gg_script/gg_action.scr, \
CHECK_INTERVAL=30, \
START_DEPENDENCIES='hard(pggs-vip,ora.asm) pullup(pggs-vip)', \
STOP_DEPENDENCIES='hard(pggs-vip)'"
./crsctl status resource ggateapp -t
./crsctl start resource ggateapp -c hostname
./crsctl status resource ggateapp -t
-->Testing
cd /optware/grid/11.2.0.4/bin
./crsctl relocate resource ggateapp -f
./crsctl status resource ggateapp -t
./crsctl status resource pggs-vip -t
cd /optware/grid/11.2.0.4/bin/
./crs_relocate -f ggateapp -f
./crsctl status resource ggateapp -t
./crsctl status resource pggs-vip -t
https://all-database-soultions.blogspot.com/2022/11/install-and-configure-gg-monitoring.html
No comments:
Post a Comment