Monday, November 18, 2013

Lets get more into Sql Plans



                                        Lets get more into Sql Plans 





Ÿ Re-analyze the schema statistics (done automatically, starting in 10g) – auto stats
Ÿ  object not anlyze
Ÿ cursor got invalidated
Ÿ  bind sensitive
Ÿ database paramter change Changing an optimizer parameter (optimizer_mode, optimizer_index_cost_adj)
Ÿ shared pool flush ::: new plan generation
Index got invalidated

Enabling dynamic sampling
Enabling parallelism






>>>>>>>>>>>>>>>>>>>>>>>>>>>> session tracing

•If you only know the package, enable trace 10046 before execute the package
1.Example:
2.SQLPLUS>alter session set events „10046 trace name context forever, level 12;
3.SQLPLUS>exec PKG_BL_RCR_REV_FACT.SP_BL_RCR_REV
4.After done, go the “user_dump_dest” and get the trace file.
5.OS> cd/u01/app/oracle/diag/rdbms/dwdb/DWDB1/trace
6.OS> tkproftracefile_nameoutputfile_nameexplain=hwdw/password waits=y



>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Using hints


/*+ gather_plan_statistics */

SELECT /*+ gather_plan_statistics */ COUNT(*)   FROM   x;

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>  cursor


Set long 20000000
Set pagesize0
Set linesize200
set linesize 121

select sql_id from v$session where sid=112;
SELECT * FROM TABLE(dbms_xplan.display_cursor('sql_id',1));
select * from table(dbms_xplan.display_cursor);  -- own session where query was run
SELECT * FROM TABLE(dbms_xplan.display_cursor('gpv3kb4n2f2q1'));
SELECT * FROM TABLE(dbms_xplan.display_cursor(NULL, NULL, 'last allstats'));


SQL> set linesize 150
SQL> set pagesize 2000
SQL> select * from TABLE(dbms_xplan.display_cursor('&SQL_ID', &CHILD_number,'ALL'));

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>  v$sql_plan


col PLAN FOR a150
 SELECT
     RPAD('(' || p.plan_line_ID || ' ' || NVL(p.plan_parent_id,'0') || ')',8) || '|' ||
     RPAD(LPAD (' ', 2*p.plan_DEPTH) || p.plan_operation || ' ' || p.plan_options,60,'.') ||
     NVL2(p.plan_object_owner||p.plan_object_name, '(' || p.plan_object_owner|| '.' || p.plan_object_name || ') ', '') ||
     NVL2(p.plan_COST,'Cost:' || p.plan_COST,'') || ' ' ||
     NVL2(p.plan_bytes||p.plan_CARDINALITY,'(' || p.plan_bytes || ' bytes, ' || p.plan_CARDINALITY || ' rows)','') || ' ' ||
     NVL2(p.plan_partition_start || p.plan_partition_stop,' PStart:' ||  p.plan_partition_start || ' PStop:' || p.plan_partition_stop,'') ||
     NVL2(p.plan_time, p.plan_time || '(s)','') AS PLAN
     FROM v$sql_plan_monitor p
     WHERE sql_id = '81c1xvnmsyr64'
     AND sql_exec_id = 16777270
     AND sql_exec_start=TO_DATE('11-jan-2012 05:59:27','dd-mon-yyyy hh24:mi:ss')
     ORDER BY p.plan_line_id, p.plan_parent_id;



>>>>>>>>>>>>>>>>>>>>>> explain plan for



EXPLAIN PLAN   
1) Explain plan displays execution plan(sequence of operation) chosen by oracle optimizer
To run statement of select , insert , update ,delete.


2)
  CREATING EXPLAIN TABLE  - UTLXPLAN.SQL 
  HIDE PARALLEL QUERY INFORMATION – UTLXPLS.SQL
     (TO QUERY THE PLAN_TABLE)
  SHOW PARALLEL QUERY INFORMATION – UTLXPLP.SQL
      (TO QUERY THE PLAN_TABLE)

3)
 EXPLAIN Plan ****INTO PLAN_TABLE  SET STATEMENT_ID = ‘    ‘ ******** FOR     SELECT ……………


NOTE : THE EXECUTION PLAN FOR EVERY SQL STATEMEN IS GENERATED AND STORED BY DEFAULT IN   V$SQL_PLAN. BUT THE EXECUTION   PLAN IS STORED ONLY TILL THE STATEMENT IS IN LIBRARY CACHE.


select * from table(dbms_xplan.Display);
select plan_table_output from table(dbms_xplan.display()) ;

select plan_table_output
   from TABLE( dbms_xplan.display
               ( 'PLAN_TABLE',
                (select rawtohex(address)||'_'||child_number x
                 from v$sql
 where sql_text='select * from t t1 where object_id > 32000' ),
                 'serial' ) );


>>>>>>>>>>>>>>>>>>>>>>>>>>>> awr ( 11g.. will list all sql plans . historical )

Ÿ check from dba_hist_sqltext if sqlid  has been AWR. If its not loaded create a manual snapshot


Ø  exec dbms_workload_repository.modify_snapshot_settings(topnsql=>’MAXIMUM’);
Ø  exec dbms_workload_repository.create_snapshot();
Ø  exec dbms_workload_repository.modify_snapshot_settings(topnsql=>’DEFAULT’);
Ø  select sql_id,sql_text from dba_hist_sqltext where sql_id=’232xssd22232’;


 select * from TABLE(DBMS_XPLAN.DISPLAY_AWR('&sqlid'));
 select * from TABLE(DBMS_XPLAN.DISPLAY_AWR('&sqlid',’plan_hash_value’));


>>>>>>>>>>>>>>>>>>>>>>>>>>>> set auto trace


SQL*PLUS AUT0TRACE / Explain plan
1)         RUN SQLPLUS/ ADMIN / PLUSTRCE.SQL  ? creates plus trace role .
2)         GRANT PLUSTRACE TO SCOTT
3)         SET AUTOTRACE         ON / OFF / TRACEONLY
                              ON EXPLAIN / ON STATISTICS

Set autotrace traceonly explain statistics

Note    :  TO SET AUTO TRACE WE MUST HAVE PLAN_TABLE AND PLUSTRACE ROLE.







1)select SQL_TEXT, hash_value from V$sqltext where ADDRESS='0700000B0BDC7B60' order by piece;


pause this query shows columns useful in Parallel Query or distributed query
col id for 99
col Par for 99
col pos for 99
col "Table Queue" for a10
col Operation format a24
col Options   format a12
col Object    format a12
col "Row Source" for a10
col "Inter Step SQL" for a16
col "Parallel Oper Type" for a20
col "Row Dist" for a11
set long 500
pause Position has total cost in row when ID=0 if CBO is used
select id "Id",
       parent_id "Par",
       position "Pos",
       lpad(' ', 2*LEVEL) || OPERATION ||
        decode(ID, 0, ' Cost='||POSITION) "Operation",
       OPTIONS "Options",
       OBJECT_NAME "Object",
       OBJECT_NODE "Table Queue",
       other_tag "Parallel Oper Type",
       distribution "Row Dist",
       other "Inter Step SQL"
from v$sql_plan
       where address = '&addr';




2)select '--------------------------------------------------------------------------------' from dual
union all
select '| Operation                      | PHV/Object Name     |  Rows | Bytes|   Cost |'  as "Optimizer Plan:"
from dual
union all
select '--------------------------------------------------------------------------------' from dual
union all
select /*+ RULE */ *
  from (select
       rpad('|'||substr(lpad(' ',1*(depth-1))||operation||
            decode(options, null,'',' '||options), 1, 32), 33, ' ')||'|'||
       rpad(decode(id, 0, '----- '||to_char(hash_value)||' -----'
                     , substr(decode(substr(object_name, 1, 7), 'SYS_LE_', null, object_name)
                       ||' ',1, 20)), 21, ' ')||'|'||
       lpad(decode(cardinality,null,'  ',
                decode(sign(cardinality-1000), -1, cardinality||' ',
                decode(sign(cardinality-1000000), -1, trunc(cardinality/1000)||'K',
                decode(sign(cardinality-1000000000), -1, trunc(cardinality/1000000)||'M',
                       trunc(cardinality/1000000000)||'G')))), 7, ' ') || '|' ||
       lpad(decode(bytes,null,' ',
                decode(sign(bytes-1024), -1, bytes||' ',
                decode(sign(bytes-1048576), -1, trunc(bytes/1024)||'K',
                decode(sign(bytes-1073741824), -1, trunc(bytes/1048576)||'M',
                       trunc(bytes/1073741824)||'G')))), 6, ' ') || '|' ||
       lpad(decode(cost,null,' ',
                decode(sign(cost-10000000), -1, cost||' ',
                decode(sign(cost-1000000000), -1, trunc(cost/1000000)||'M',
                       trunc(cost/1000000000)||'G'))), 8, ' ') || '|' as "Explain plan"
          from v$sql_plan
         where hash_value= nvl('&hash_value','0')
         order by id
)
union all
select '--------------------------------------------------------------------------------' from dual;
set verify on
set heading on
undefine hash_value
set heading on;




è Historical Sql plan  :

SELECT STAT.SQL_ID, SQL_TEXT, PLAN_HASH_VALUE, PARSING_SCHEMA_NAME, ELAPSED_TIME_DELTA, STAT.SNAP_ID, SS.END_INTERVAL_TIME FROM DBA_HIST_SQLSTAT STAT, DBA_HIST_SQLTEXT TXT,
DBA_HIST_SNAPSHOT SS WHERE STAT.SQL_ID = TXT.SQL_ID AND STAT.DBID = TXT.DBID AND SS.DBID = STAT.DBID
AND SS.INSTANCE_NUMBER = STAT.INSTANCE_NUMBER AND STAT.SNAP_ID = SS.SNAP_ID AND SS.BEGIN_INTERVAL_TIME >= sysdate-60
AND UPPER(STAT.SQL_ID) = '&SQL_ID'
AND PLAN_HASH_VALUE = &PLAN_HASH
ORDER BY ELAPSED_TIME_DELTA DESC;


select s.begin_interval_time, s.end_interval_time , q.snap_id, q.dbid, q.sql_id,
q.plan_hash_value, q.optimizer_cost, q.optimizer_mode
from dba_hist_sqlstat q, dba_hist_snapshot s
where  q.sql_id = 'bpkgrf2awmhfc'
and q.snap_id = s.snap_id
and s.begin_interval_time between sysdate-8 and sysdate
order by s.snap_id desc;






select sql_id, sql_text from dba_hist_sqltext where upper(sql_text) like '%CUSTOMERS%';


SELECT tf.* FROM DBA_HIST_SQLTEXT ht, TABLE (DBMS_XPLAN.DISPLAY_AWR(ht.sql_id,NULL, NULL,
'ALL' )) tf WHERE ht.sql_id='&1'





1) select DBID, SQL_ID, SQL_TEXT from dba_hist_sqltext where sql_text like ‘%your query%’;

-->> or see all available sql

variable snap_id_1 number;
variable snap_id_2 number;
exec :snap_id_1:=789;
exec :snap_id_2:=786;
select   a.sql_id, a.plan_hash_value snap_id_1_plan, b.plan_hash_value snap_id_2_plan
    from dba_hist_sqlstat a, dba_hist_sqlstat b
   where (a.snap_id = :snap_id_1 and b.snap_id = :snap_id_2)
     and (a.sql_id = b.sql_id)
     and (a.plan_hash_value != b.plan_hash_value)
order by a.sql_id;


select * from V$SQL_SHARED_CURSOR where sql_id='32927hsaftay0';


--> see sql whoes plan has change

select distinct sql_id, plan_hash_value, f snapshot,
                (select begin_interval_time
                   from dba_hist_snapshot
                  where snap_id = f) snapdate
           from (select sql_id, plan_hash_value,
                        first_value (snap_id) over (partition by sql_id, plan_hash_value
order by snap_id) f
                   from (select   sql_id, plan_hash_value, snap_id,
                                  count (distinct plan_hash_value) over (partition by
sql_id) a
                             from dba_hist_sqlstat
                            where plan_hash_value > 0
                         order by sql_id)
                  where a > 1)
       order by sql_id, f;


Select Sql_Id,Count(*) From
(Select Distinct Sql_Id,Plan_Hash_Value From Dba_Hist_Sqlstat where plan_hash_value <> 0 )
Group By Sql_Id
having count(*) > 1



2)  get plan hash value :

   select   snap_id, plan_hash_value
    from dba_hist_sqlstat
    where snap_id in (789, 786) and sql_id = 'dcxm4kr1urg93'
    order by snap_id desc;

or

select s.begin_interval_time, s.end_interval_time , q.snap_id, q.dbid, q.sql_id,
q.plan_hash_value, q.optimizer_cost, q.optimizer_mode
from dba_hist_sqlstat q, dba_hist_snapshot s
where q.dbid = 1234567890 and q.sql_id = ‘12345xxxxxxxx’
and q.snap_id = s.snap_id
and s.begin_interval_time between sysdate-2 and sysdate
order by s.snap_id desc;


3) compare explain plan of different hash value using below query :

select id, operation, options, object_name, cost
from dba_hist_sql_plan
where dbid = 1234567890 and sql_id = ‘12345xxxxxxxx’
and plan_hash_value = <plan_hash_value1>


select * from table(dbms_xplan.display_awr('32927hsaftay0',714092912));
select * from table(dbms_xplan.display_awr('dcxm4kr1urg93',1582437882));




set lines 155
col execs for 999,999,999
col avg_etime for 999,999.999
col avg_lio for 999,999,999.9
col begin_interval_time for a30
col node for 99999
break on plan_hash_value on startup_time skip 1
select ss.snap_id, ss.instance_number node, begin_interval_time, sql_id, plan_hash_value,
nvl(executions_delta,0) execs,
(elapsed_time_delta/decode(nvl(executions_delta,0),0,1,executions_delta))/1000000
avg_etime,
(buffer_gets_delta/decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_lio
from DBA_HIST_SQLSTAT S, DBA_HIST_SNAPSHOT SS
where sql_id = nvl('&sql_id','b55nnb6ykd2tr')
and ss.snap_id = S.snap_id
and ss.instance_number = S.instance_number
and executions_delta > 0
order by 1, 2, 3
/









SPOOL coe_gen_sql_profile.log;
SET DEF ON TERM OFF ECHO ON FEED OFF VER OFF HEA ON LIN 2000 PAGES 100 LONG 8000000 LONGC
800000 TRIMS ON TI OFF TIMI OFF SERVEROUT ON SIZE 1000000 NUMF "" SQLP SQL>;
SET SERVEROUT ON SIZE UNL;
REM
REM $Header: 215187.1 coe_gen_sql_profile.sql 11.4.3.4 2011/07/20 carlos.sierra $
REM
REM Copyright (c) 2000-2011, Oracle Corporation. All rights reserved.
REM
REM AUTHOR
REM carlos.sierra@oracle.com
REM
REM SCRIPT
REM coe_gen_sql_profile.sql
REM
REM DESCRIPTION
REM Inputs one SQL_ID and outputs a set of scripts generated by
REM coe_xfr_sql_profile.sql. One for each known plan of given
REM SQL_ID.
REM
REM PRE-REQUISITES
REM 1. Oracle Tuning Pack license.
REM
REM PARAMETERS
REM 1. SQL_ID (required)
REM
REM EXECUTION
REM 1. Connect into SQL*Plus as SYSDBA or user with access to
REM data dictionary.
REM 2. Execute script coe_gen_sql_profile.sql passing SQL_ID
REM (parameter can be passed inline or until requested).
REM
REM EXAMPLE
REM # sqlplus system
REM SQL> START coe_gen_sql_profile.sql [SQL_ID];
REM SQL> START coe_gen_sql_profile.sql gnjy0mn4y9pbm;
REM SQL> START coe_gen_sql_profile.sql;
REM
REM NOTES
REM 1. For possible errors see coe_gen_sql_profile.log
REM 2. Be aware that using DBMS_SQLTUNE requires a license for
REM Oracle Tuning Pack.
REM
SET TERM ON ECHO OFF;
PRO
PRO Parameter 1:
PRO SQL_ID (required)
PRO
DEF sql_id0 = '&1';
PRO
SET TERM OFF;
PRO
PRO Plans from GV$SQL
PRO ~~~~~~~~~~~~~~~~~

SELECT plan_hash_value,
ROUND(SUM(elapsed_time)/SUM(executions)/1e6, 3) avg_et_secs,
ROUND(SUM(cpu_time)/SUM(executions)/1e6, 3) avg_cpu_secs,
ROUND(SUM(user_io_wait_time)/SUM(executions)/1e6, 3) avg_io_secs,
ROUND(SUM(buffer_gets)/SUM(executions)) avg_buffer_gets,
ROUND(SUM(disk_reads)/SUM(executions)) avg_disk_reads,
ROUND(SUM(rows_processed)/SUM(executions)) avg_rows,
SUM(executions) executions
FROM gv$sql
WHERE sql_id = TRIM('&&sql_id0.')
AND executions > 0
GROUP BY
plan_hash_value
ORDER BY
2, 3;
PRO
PRO
PRO Plans from DBA_HIST_SQLSTAT
PRO ~~~~~~~~~~~~~~~~~~~~~~~~~~~

SELECT plan_hash_value,
ROUND(SUM(elapsed_time_total)/SUM(executions_total)/1e6, 3) avg_et_secs,
ROUND(SUM(cpu_time_total)/SUM(executions_total)/1e6, 3) avg_cpu_secs,
ROUND(SUM(iowait_total)/SUM(executions_total)/1e6, 3) avg_io_secs,
ROUND(SUM(buffer_gets_total)/SUM(executions_total)) avg_buffer_gets,
ROUND(SUM(disk_reads_total)/SUM(executions_total)) avg_disk_reads,
ROUND(SUM(rows_processed_total)/SUM(executions_total)) avg_rows,
SUM(executions_total) executions
FROM dba_hist_sqlstat
WHERE sql_id = TRIM('&&sql_id0.')
AND executions_total > 0
GROUP BY
plan_hash_value
ORDER BY
2, 3;
PRO
PRO
PRO Plans from GV$SQL and/or DBA_HIST_SQLSTAT
PRO ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

BEGIN
FOR i IN (WITH
p AS (
SELECT plan_hash_value
FROM gv$sql_plan
WHERE sql_id = TRIM('&&sql_id0.')
AND other_xml IS NOT NULL
UNION
SELECT plan_hash_value
FROM dba_hist_sql_plan
WHERE sql_id = TRIM('&&sql_id0.')
AND other_xml IS NOT NULL ),
m AS (
SELECT plan_hash_value,
SUM(elapsed_time)/SUM(executions) avg_et_secs
FROM gv$sql
WHERE sql_id = TRIM('&&sql_id0.')
AND executions > 0
GROUP BY
plan_hash_value ),
a AS (
SELECT plan_hash_value,
SUM(elapsed_time_total)/SUM(executions_total) avg_et_secs
FROM dba_hist_sqlstat
WHERE sql_id = TRIM('&&sql_id0.')
AND executions_total > 0
GROUP BY
plan_hash_value )
SELECT p.plan_hash_value,
ROUND(NVL(m.avg_et_secs, a.avg_et_secs)/1e6, 3) avg_et_secs
FROM p, m, a
WHERE p.plan_hash_value = m.plan_hash_value(+)
AND p.plan_hash_value = a.plan_hash_value(+)
ORDER BY
avg_et_secs NULLS LAST)
LOOP
DBMS_OUTPUT.PUT_LINE('-- phv:'||i.plan_hash_value||' avg_et_secs:'||i.avg_et_secs);
DBMS_OUTPUT.PUT_LINE('@@coe_xfr_sql_profile.sql &&sql_id0. '||i.plan_hash_value||';');
END LOOP;
END;
/
SPO coe_gen_sql_profile_&&sql_id0..sql;
/
SPO OFF;
@coe_gen_sql_profile_&&sql_id0..sql;
HOS zip -mT coe_gen_sql_profile_&&sql_id0 coe_gen_sql_profile.log coe_xfr_sql_profile.log
coe_gen_sql_profile_&&sql_id0..sql coe_xfr_sql_profile_&&sql_id0._*.sql
HOS zip -d coe_gen_sql_profile_&&sql_id0 coe_xfr_sql_profile.log
SET TERM ON;
HOS unzip -l coe_gen_sql_profile_&&sql_id0
UNDEFINE 1 sql_id0
CL COL
PRO
PRO coe_gen_sql_profile completed.






-------------------------------------------------------------------------------------------
Cost =  (#SRds * sreadtim +
         #MRds * mreadtim +
         #CPUCycles / cpuspeed ) / sreadtim

where:
#SRDs is the number of single block reads
#MRDs is the number of multi block reads
#CPUCycles is the number of CPU Cycles *)
sreadtim is the single block read time
mreadtim is the multi block read time
cpuspeed is the CPU cycles per second

CPUCycles includes CPU cost of query processing (pure CPU cost) and CPU cost of data retrieval (CPU cost of the buffer cache get).


assume that our average disk I/O delay is 15 milliseconds. 
A “cost” of 4 would mean that the optimizer expects the query to take roughly
60 milliseconds to complete.


-------------------------------------------------------------------
| Id  | Operation                    | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------
|   0 | SELECT STATEMENT             |         |     1 |    57 |     4   (0)| 00:00:01 |
|   1 |  NESTED LOOPS                |         |     1 |    57 |     4   (0)| 00:00:01 |
|*  2 |   TABLE ACCESS FULL          | EMP     |     1 |    37 |     3   (0)| 00:00:01 |
|   3 |   TABLE ACCESS BY INDEX ROWID| DEPT    |     1 |    20 |     1   (0)| 00:00:01 |
|*  4 |    INDEX UNIQUE SCAN         | PK_DEPT |     1 |       |     0   (0)| 00:00:01 |
--------------------------------------------------------------------


cost = number of single block reads
card = number of rows






##########################  11g   Automatic Plan Capture  #############################

When automatic plan capture is enabled, the system automatically creates and maintains the plan history for SQL statements using n information provided by the optimizer. The plan history will include relevant information used by the optimizer to reproduce an execution  plan, such as the SQL text, outline, bind variables, and compilation environment.

The initial plan generated for a SQL statement is marked as accepted for use by the optimizer, and represents both the plan history and  the SQL plan baseline. All subsequent plans will be included in the plan history, and those plans that are verified not to cause  performance regressions will be added to the SQL plan baseline during the SQL plan baseline evolution phase.

To enable automatic plan capture, set


There’s lot more to it than just the parameter. There’s something called optimizer_capture_sql_plan_baselines which is FALSE by default  and should stay FALSE, else we have no control over the plans. Also, If its’ set to TRUE, any repeating SQL would have its plan saved as a base lines.
Secondly, we have to load the proven plans manually using any of the following methods.

From cursor cache (using sqlid in the sqlarea)
creating a SQL tuning set and loading baselines from it. This gives the flexibility of adding and removing the SQLs from the tuning set.

From AWR as well.  You can use DBMS_SQLPLAN & DBMS_SPM packages. You can go through tuning guide or SQL pl/sql package reference for examples as to how to
use the procedures inside the packages.

You can also use OEM. Anything you do, basic thing is that the PLAN has to be the best one and that you’d want to lock in which turns out  to be a baseline that can be seen in the views like dba_sql_plan_baselines as fixed or enabled or whatever

Automatic plan capture will not occur for a statement if a stored outline exists for it and is enabled and the parameter use_stored_outlines is TRUE. In this case, turn on incremental capture of plans into an STS using the function capture_cursor_cache_sqlset() in the DBMS_SQLTUNE package. After you have collected the plans for your workload into the STS, manually create SQL plan baselines using the method described earlier. Then, disable the stored outlines or set use_stored_outlines to FALSE. From now on, SPM will manage your workload and stored outlines will not be used for those statements.

OPTIMIZER_USE_SQL_PLAN_BASELINES – msut be true


Oracle Performance Monitoring Basic


                                Oracle Performance Monitoring  Basic 


Lately few people have been requesting me to share something on Tuning Concepts .  Basically we might know 100 of  things but we must smartly know when to use them . Though i am still to explore more.. but it will surely help mid level dba's


I always prefer to OEM for everything .. It can be used for lot things we even think of .

If we talk about tuning . we can use OEM for  checking concurrency ,  generating reports ,  checking fragmentation , ruing segment advisory, checking session details and its wait  , historical performance etc.

Also its just lat year i  realized the importance of using past performance .

If i have time i will share some of my important sql to share how to check, but in this topic let me share what needs to be checked and what should be your  target areas


Its very crucial for us have all our scripts really handy . Have seen many Dba dying to search proper
script to monitor the very last moment .
Here are area where one should looks for as first steps. It depends whether you are  targeting to
diagnose one session or whole database  performance  :

-- wait event , blocking , multiple plan hash value, check alert logs, check for recent generated  trace files in     dump area, last analyzed , stale stats ,
    fragmentation of table and index ,  check sql plan for fts   ,  check if table was initially analyzed that caused    performance change ,   ofcource from O/S check  resource utilization , O/S error logs, V$resource_limit ,
  check if any backups are running hampering performance ,  check if any smon recovery / rollback is       happening  ,  check for hit ratios, check if session i/o is changing , session last et call ,  check for parallel
operations and long opps ,  check database temp and undo utilization - check highest consumer ,
check archive generation rate ,  ask application guys if any changes went in ,  check listener log ,
check defunct process  on o/s ,  check sqlid  historical excution : many time application guys just fool with you calming sql was running fine ,but it never did.. so check for historical sql performance using dba_hist_sqlstat  , try generating session trace

-- check for database report...  there are lot of reports which 90% dba  dont even know of ..
 ideally people just check awr report... but i prefer  ash report.. specially ashrpti.sql  .. to concentrate  on     one session or sqlid .  some of reports that can help you are,,  ash , ashrpti ,,  awr  sql report, awr compare report, addm ,   awr/statspack ,  sqldiag report-- check on metalink ,



When it comes for recommendation and fixes 

-- if case of multiple plan:  try creating profiles and baselines ,  gathering stats , creating missing index ,
sga recommendation based on hit ratios,   flushing shared pool, purging sqlid ,   restoring old stats -- from 10g ,  try invalidating cursor ,





 In case of Rac : 

For checks : 

-- check  resource status , check cluster alert log, deamon logs ,  ocr and voting disk status ,  service status ,  check deamons status ,  check asm logs , o/s logs , 


1) CRS HOME :
   Ÿ  CRS ALERT LOG :  log/<hostname>/alert<nodename>.log
   Ÿ  CRS logs :  log/<hostname>/crsd/  --- > archived every 10mb
   Ÿ  CSS logs : log/<hostname>/cssd/  -- > archived every 20mb
   Ÿ  EVM logs : log/<hostname>/evmd+evm/log
   Ÿ  OPMN logs – opmn/logs
   Ÿ  resource specific logs – log/<hostname>/racg/
   Ÿ  cluster communication logs :  log
2) ORACLE_home :
  Ÿ  resource specific logs :  log/<hostname>/racg
  Ÿ  srvm logs --  log/<hostname>/client
  Ÿ  alert and other trace files : bdump/cdump/udump
  Ÿ  AWR/ statspack/ ash/ addm  of each node
  Ÿ  listener logs .
3)  ASM_logs : 
  Ÿ   alert logs and other trace files  : oracle_home/rdbms/log/ bdump/udump/cdump
4)   /etc/oracle/oprocd/  (node eviction)

*** Oprocd log locations:
In /etc/oracle/oprocd or /var/opt/oracle/oprocd depending on version/platform.
Note that oprocd only runs when no vendor clusterware is running or on Linux > 10.2.0.4




1) SRVM tracing : We enable srvm tracing by giving “ export SRVM_TRACE=TRUE  ” .  By doing so we enable tracing for srvctl,cvu ,VIPCA and gsdctl .

srvctl config srvpool

2) Debugging the resource as root user :
crsctl debug log module_name component:debugging_level
 module_name = crs , css , evm
level = 1 to 5
component :  “crsctl lsmodules module_name”

3) Resource debugging : crsctl debug log res  "ora.node1.vip:1" 

4) ./crsctl get log css "all"
./crsctl set log css CSSD:4

5) To change amount  of  ocr logging, edit the file CRS_Home/srvm/admin/ocrlog.ini.




cluvfy -h

cluvfy stage -pre crsinst -n node_list
cluvfy stage -post crsinst -n node_list [-verbose]
cluvfy comp clu


cluvfy comp scan -verbose
Cluvfy comp crs –n all –verbose
cluvfy comp ocr –n all            (to verify ocr )
cluvfy comp ocr -n all -verbose

setenv MYNODES node1,node3,node5  ||  cluvfy comp nodecon -n $MYNODES [-verbose]  || 
cluvfy comp nodecon -n node1,node2,node3 –i eth0 -verbose
cluvfy comp nodecon -n all -verbose
cluvfy comp nodecon -n rac1

STORAGE ::
cluvfy comp ssa  -n all -verbose    ( to confirm if storage is shared)
cluvfy comp ssa  -n all -s /dev/sda
cluvfy comp space  -n all -l  /home/dbadmin/products  –z 2G -verbose    (to confirm free space . does not support block or raw device)
cluvfy comp cfs -f /oradbshare –n all -verbose    ( check intigrity of cluster filesystem )



è  Commands useful in day to day  :

.  ./setSID_k +asm1
crs_stat | awk -F= '/NAME=/{n=$2}/TYPE=/{t=$2}/TARGET=/{g=$2}/STATE=/{s=$2; printf("%-45s%-15s%-10s%-30s\n", n,t,g,s)}'

Ÿ crs_stat | awk -F= '/NAME=/{n=$2}/TYPE=/{t=$2}/TARGET=/{g=$2}/STATE=/{s=$2; printf("%-45s%-15s%-10s%-30s\n", n,t,g,s)}'
Ÿ crs_stat -t –v
Ÿ . $CRS_HOME/bin/crsstat
Ÿ ./crs_stat –t                      (status of all resources ) ( as oracle user)
Ÿ . $CRS_HOME/bin/crsctl check crs   ( to deamon  status ) ( as oracle user)
Ÿ ps –ef | grep d.bin
Ÿ  ./crsctl stop crs                  ( to stop crs ) (with root user )
Ÿ  ./crsctl start  crs                ( to start  crs) ( with root user )
Ÿ crsctl start resource –all
Ÿ crsctl stop has
Ÿ crsctl start has
Ÿ crsctl stop crs –f
Ÿ crsctl enable  crs  -->  enable crs  on next startup
Ÿ crsctl disable crs 
It will disable crs from coming up after reboot (These functions update the file /etc/oracle/scls_scr/node_name/root/crsstart  that           contain the string enable or disable)
Ÿ crsctl query crs softwareversion
Ÿ crsctl query crs activeversion
Ÿ  crsctl check daemon .              (see infor of each deamon )
Ÿ  crsctl stop  cluster -all
Ÿ  crsctl check cluster –all
Ÿ crs_start ora.q-faykan.ons

Ÿ crsctl stop resource ora.QGISC.INS_WFC.QGISC2.srv       
Ÿ crsctl stop resource ora.QGISC.QGISC_INS_FGEA.QGISC2.srv
Ÿ crsctl stop resource ora.QGISC.QGISC_INS_WFC.QGISC2.srv 
Ÿ crsctl stop resource ora.QRDBC.DBR_SRVC.QRDBC2.srv      
Ÿ crsctl start resource ora.QGISC.INS_WFC.QGISC2.srv       
Ÿ crsctl start resource ora.QGISC.QGISC_INS_FGEA.QGISC2.srv
Ÿ crsctl start resource ora.QGISC.QGISC_INS_WFC.QGISC2.srv 
Ÿ crsctl start resource ora.QRDBC.DBR_SRVC.QRDBC2.srv      


Ÿ  export ORACLE_SID=orcl1
Ÿ ons debug                                   (ons ip debuging)
Ÿ olsnodes -s -t  / olsnodes -n      ( node information )
Ÿ  /runcluvfy.sh stage -post crsinst -n node1,node2 -verbose
Ÿ $ date; ssh q-feyd "date";ssh q-faykan "date"
Ÿ ps –fu oracle     -->  to check resource started by oracle
Ÿ srvctl getenv database -d orcl
Ÿ srvctl getenv instance -d orcl -i orcl1
Ÿ Crs_getper
Ÿ Crs_setperm
Ÿ  crs_profile –print                 ( to see crs profile .)
Ÿ  crsctl start res -all
Ÿ  crsctl stop resource ora.QRDBC.DBR_SRVC.QRDBC2.srv    
Ÿ  crsctl stat res -t -init
Ÿ  crsctl start res ora.evmd -init
Ÿ  crsctl stop res ora.evmd -init -f
Ÿ  crsctl modify resource "ora.cssd" -attr "AUTO_START=1"
Ÿ crsctl delete serverpool msinqprd -f
+QUEUE_IN_DATA/msinqprd/spfilemsinqprd.ora -n msinqprd -a QUEUE_IN_DATA,QUEUE_IN_FLASHBACK

  crsctl (check / start / stop ) cluster –all  -- 11g


Ÿ srvctl config network
Ÿ srvctl config nodeapps
Ÿ srvctl modify database –dbname –y automatic/manual  (automatic start/stop resource )
Ÿ srvctl status database -d wwprd
Ÿ srvctl status nodeapps -n wwprod3
Ÿ srvctl status asm –n node_name
Ÿ srvctl start nodeapps -n green -4  ( starts  ons, listener , gsd, vip  )
Ÿ srvctl start asm -n green –i asm1
Ÿ srvctl start instance –d orcl –i orcl1
Ÿ srvctl remove database -d databasename ( -f)
Ÿ srvctl start database -d dctmdb
Ÿ srvctl start listener -n node_name
Ÿ srvctl stop database -d dctmdb  -o immediate
Ÿ srvctl start database -d dctmdb –i dctmdb1 –o mount
Ÿ srvctl stop instance –d orcl –i orcl1
Ÿ srvctl getenv database -d PBOKCC
Ÿ srvctl setenv database -d PBOKCC -t
Ÿ srvctl stop  asm -n green –i asm1 –o immediate
Ÿ srvctl stop nodeapps -n green -4
Ÿ srvctl remove instance -d DBM -i DBM8
Ÿ srvctl remove database -d DBM

srvctl add database -d msinqprd -o /oracle/product/11.2.0/db -p
srvctl add database -d BOSTON –o /opt/oracle/product/10g_db_rac
srvctl add instance -d BOSTON -i BOSTON1 -n boston_host1
srvctl add instance -d BOSTON -i BOSTON2 -n boston_host2
srvctl add database -d tststby -o
srvctl add instance -d tststby -i tststby2 -n raclinux2
srvctl add instance -d tststby -i tststby1 -n raclinux1
srvctl add service -d tststby -s tststby_s -r tststby2 -a tststby1
srvctl modify database -d tststby -p ’+DGDUP/spfiletststby.ora’
srvctl config database -d tststby
srvctl start instance -d orcl -i orcl1

srvctl status database –d ITDBSS
srvctl status service –d ITDBSS
srvctl status listener
srvctl status scan_listener
srvctl status scan
srvctl status nodeapps

srvctl remove/enable/disable/ asm -n node_name -i instance_name
srvctl start asm -n node_nmae -i instance_name

srvctl stop diskgroup -g DATA
srvctl [*] diskgroup -n'


srvctl add database -d JIRAP2_sldn0833por -n JIRAP2 -o /app/ora/local/product/11.2.0.2/db_1
srvctl disable database -d JIRAP2_sldn0833por
srvctl add listener -l LISTENER_JIRAP2 -o /app/ora/local/product/11.2.0.2/db_1 -p TCP:1552 -s
srvctl disable listener -l LISTENER_JIRAP2


Thursday, October 31, 2013

Oracle GOLDEN GATE

                                                        GOLDEN GATE



#################################################################
Commands
#################################################################

send EAUSFLD3 status
info EAUSFLD2 , showch
send EAUSFLD2 , showtrans
info EAUSFLD3 , detail

stats EAUSFLD3


--  Use the HISTORY command to display a list of previously executed commands.
--  Use the ! command to execute a previous command again without editing it.
--  Use the FC command to edit a previous command and then execute it again.


echo $GGBASE  -- $ORACLE_HOME/ggsci
connect /
dblogin userid gguser , password gguser


pugre old extract  -- in param extract file
list table *


info all
info mgr
info manager
info extract
info e01123 , showch  -- Show checkpoints
info extract e_cust
info extract e_cust, detail
info extract e_cust, showch
info extract *, tasks
info extract *, allprocesses
info extract ext1, detail

info e01123 , showch  -- Show checkpoints
info extract e_cust
info extract e_cust, detail
info extract e_cust, showch
info extract *, tasks
info extract *, allprocesses
info exttrail *
info trandata SCOTT.*


GGSCI (server) 6> shell pwd


KILL EXTRACT group name
LAG EXTRACT
lag extract ext*


view param mgr
edit param mgr

start mgr
start <group>
start *
stop er *
info er *
GGSCI> START EXTRACT <extract name>, DETAIL


add trandata amer.*
info trandata amer.*

-info all
–info mgr
–info <group>
–send <group>, status
–info <group>, showch# checkpoint info, with required arch log #

send <group>, status
send extract ext1 status
send extract ext2, getlag
send manager childstatus
SEND MANAGER GETPURGEOLDEXTRACTS


status manager
status extract ext1
stats extract ext2 reportrate hr

VIEW GGSEVT
view report ext1
view report rep1
view report mgr

SHOWTRANS 

–lag <group>
•Use LAG EXTRACT to determine a true lag time between Extract and the data source. Lag time is the difference in seconds between the time
A record was processed by Extract and the timestamp of that record in the data source.

•Use LAG REPLICAT to determine a true lag time between Replicat and the trail. Lag time is the difference, in seconds, between the time that the last record was processed by Replicatand the timestamp of the record in the trail.
–view params<group> (to view the parameter file)
–alter <group>, etrollover(to create/increment a new trail file)
–alter <group>, extseqno<#>, extrba<rba> (To designate a begin point in trail file)
–alter <group>, extseqno<#>, extrba1024 (To start from new archive or online logfiles. Oracle reserves 1024 bytes for file header info)
–alter <group>, begin now (Begins processing at the current time)
–alter <group>, begin 2003-12-25 (Begins processing to the specific datetime)
–refresh mgr(Enables to change the parameter without stopping and starting the Manager process. The exception is changing the port number)
–send manger, childstatus(Retrieves status information about processes started by Manager.)

–stats <group> (to display statistics for one or more groups.)




#################################################################
GG VIEWS
#################################################################



COLUMN LOG_GROUP_NAME HEADING 'Log Group' FORMAT A20
COLUMN TABLE_NAME HEADING 'Table' FORMAT A15
COLUMN ALWAYS HEADING 'Conditional or|Unconditional' FORMAT A14
COLUMN LOG_GROUP_TYPE HEADING 'Type of Log Group' FORMAT A20
SELECT
LOG_GROUP_NAME,
TABLE_NAME,
DECODE(ALWAYS,
'ALWAYS', 'Unconditional',
'CONDITIONAL', 'Conditional') ALWAYS,
LOG_GROUP_TYPE
FROM DBA_LOG_GROUPS;


select SUPPLEMENTAL_LOG_DATA_MIN from v$database;

                            
SELECT s.inst_id,
s.sid,s.serial#,t.start_time,
s.status
FROM GV$session s, GV$transaction t, GV$rollstat r
WHERE s.saddr=t.ses_addr
and t.xidusn=r.usn
and s.inst_id=t.inst_id
and t.inst_id=r.inst_id
order by t.start_time;



/*
 Conn / as sysdba
 Col used format a8
 Prompt current_rba will show where redo is being written to

 Select  le.leseq  log_sequence#, cp.cpodr_bno * le.lebsz current_rba,
                         le.lesiz * le.lebsz log_size,
             substr(to_char(100 * cp.cpodr_bno / le.lesiz, '999.00'), 2) || '%'  used
     from    sys.x$kcccp  cp,
             sys.x$kccle  le
     where   le.inst_id = userenv('Instance') and
             cp.inst_id = userenv('Instance') and
             le.leseq = cp.cpodr_seq and le.leseq > 0
 */


#################################################################
Troubleshoot
#################################################################

main issues  :
gap , uncommitted transaction , gg abended ,  rep errors --  unique key , no transaction ,
checkpoint table corrupted , missing archives ,

--> info all command was not sowing lag after extra rollover . have to use "send extract status,"  



troubleshoot gap in case of missing arcive  :
-------------------------------------------------------
-- stop replication
-- export/import > refresh
-- start replication from that csn/time




FOR ERROR CHECK  : ggserr.log , tcperrs , view GGSEVT , ,logdump
Discard file , report file ,  sqlnet file ,  check if rba number is changing ,

Go to arch directory : fuser

replicate side : reperror 1401 discard , reperror 0001 discard ,

lsof -p <the process ID for one of the replicats you have running>  -- to check which process is writing to which file



-- ggserr.log: GG reports global level INFO, WARNING and ERROR messages in this file at GGBASE directory.

-- The Report file stores process level messages at dirrpt/<PROCESS_NAME>.rpt. Each process should have its own rpt file. ex. E0079.rpt.     New report file will be created each time the process starts and old one will be rolled-over. GG maintains 10 rolled-over files.

-- Discard file stores the vital RBA information, location of the problematic record within a transaction and record details. It also       saves all the discarded records, if discard flag (REPERROR) turn on. Normally discard file is located at          dirout/<PROCESS_NAME_PDB_RDB>.dsc

-- Always communicate with user during troubleshooting particularly when you planning to skip transaction. User has to approve before you     skip the transaction or a record.

-- The REPERROR flag skips all the subsequent records that encountered specific error until turned off. No way to skip only one specific     record using this flag.




Replication issues :

-- Most of the troubleshooting techniques due to data inconsistency between primary and target are similar to primary key issue that   covered in Alert 1. You can use the same tactics to resolve Alert 2 errors..
-- REPERROR flag discards records that encountered specific Oracle error and saves discarded details in discard file.
-- REPERROR skips only a problematic record/row; not entire transaction.

-- Most common REPERROR flags:
–REPERROR 0001 discard: Unique Constraint violation. Insert or update statement fails.
–REPERROR 1403 discard: No data found; update or delete statement fails.
–REPERROR 2292 discard: Integrity constraint .violated -child record found. Update or delete fails.
–REPERROR 28117 discard: Integrity constraint violated -parent record not found. Insert or update fails.


Main issues on replicate :
Alert 1 –Primary Key Issue
Alert 2 –Data Inconsistency Errors
Alert 3 –Uncommitted transaction
Alert 4 –Lag Checkpoint Exceeded Limit
Alert 5 –Data not moving
Alert 6 –Incompatible Record Format     -- fixed from gg 8 – scanforheader



How to sync gg when  when out of sync   :
-- stop gg
-- insert using dblink/export import
-- start gg -- wiuth skip errors . remove skip errors when gg is in sync
or
-- export and start replicate .. ( good if started from csn number )



Consider MAXCOMMITPROPAGATIONDELAY parameter in extract, if extract abends on SCN error.
THREADOPTIONS MAXCOMMITPROPAGATIONDELAY 30000, IOLATENCY 3000



#################################################################
Skip  Transaction 
#################################################################



replicat paramfile dirprm/rtwflxd3.prm  skiptransaction

send extract oow_ex1, showtrans

send extract oow_ex1, skiptrans 5.28.1323 FORCE


send E_CBDS showtrans 688.6.2315775  file tran_688.6.2315775.dmp detail
SEND EXTRACT E_CBDS , SKIPTRANS 688.6.2315775 THREAD 1




#################################################################



#################################################################
Tracing
#################################################################

ggsci> send <rep_name> trace2 ./dirrpt/rep_name.trc
wait for 3-4 minutes
ggsci> send <rep_name> trace2 off --->> get the trace file in rep_name.trc


 “send Extract , trace off” for Extract, “send Replicat , trace off” for Replicat
send extract EXT1 trace /tmp/trace_me.trc



TRACE | TRACE2

Valid For

Extract and Replicat

Description

Use the TRACE and TRACE2 parameters to capture Extract or Replicat processing information to help reveal processing bottlenecks. Both support the tracing of DML and DDL.

Tracing also can be turned on and off by using the SEND EXTRACT or SEND REPLICAT command in GGSCI. See "SEND EXTRACT" or "SEND REPLICAT".

Contact Oracle Support for assistance if the trace reveals significant processing bottlenecks.

Default

No tracing

Syntax

TRACE | TRACE2
[, DDL[INCLUDE] | DDLONLY]
[, [FILE] file_name]
[, THREADS (threadID[, threadID][, ...][, thread_range[, thread_range][, ...])]
TRACE
Provides step-by-step processing information.

TRACE2
Identifies the code segments on which Extract or Replicat is spending the most time.

DDL[INCLUDE] | DDLONLY
(Replicat only) Enables DDL tracing and specifies how DDL tracing is included in the trace report.

DDL[INCLUDE]
Traces DDL and also traces transactional data processing. This is the default. Either DDL or DDLINCLUDE is valid.

DDLONLY
Traces DDL but does not trace transactional data.

[FILE] file_name
The relative or fully qualified name of a file to which Oracle GoldenGate logs the trace information. The FILE keyword is optional, but must be used if other parameter options will follow the file name, for example:

TRACE FILE file_name DDLINCLUDE
If no other options will follow the file name, the FILE keyword can be omitted, for example:

TRACE DDLINCLUDE file_name
THREADS (threadID[, threadID][, ...][, thread_range[, thread_range][, ...])
Enables tracing only for the specified thread or threads of a coordinated Replicat. Tracing is only performed for threads that are active at runtime.

threadID[, threadID][, ...]
Specifies a thread ID or a comma-delimited list of threads in the format of threadID, threadID, threadID.

[, thread_range[, thread_range][, ...]
Specifies a range of threads in the form of threadIDlow-threadIDhigh or a comma-delimted list of ranges in the format of threadIDlow-threadIDhigh, threadIDlow-threadIDhigh.

A combination of these formats is permitted, such as threadID, threadID, threadIDlow-threadIDhigh.

If the Replicat is in coordinated mode and TRACE is used with a THREADS list or range, a trace file is created for each currently active thread. Each file name is appended with its associated thread ID. This method of identifying trace files by thread ID does not apply when SEND REPLICAT is issued by groupname with threadID (as in SEND REPLICAT fin003 TRACE...) or when only one thread is specified with THREADS.

Contact Oracle Support for assistance if the trace reveals significant processing bottlenecks.

Examples

Example 1  
The following traces to a file named trace.trc. If this is a coordinated Replicat group, the tracing applies to all active threads.

TRACE /home/ggs/dirrpt/trace.trc
Example 2  
The following enables tracing for only thread 1. In this case, because only one thread is being traced, the trace file will not have a threadID extension. The file name is trace.trc.

TRACE THREADS(1) FILE ./dirrpt/trace.trc
Example 3  
The following enables tracing for threads 1,2, and 3. Assuming all threads are active, the tracing produces files trace001, trace002, and trace003.

TRACE THREADS(1-3) FILE ./dirrpt/trace.trc





#################################################################
Stopping process forcefully 

#################################################################

stop  extract  ehahk  !
stop  extract  ehahk    force 



#################################################################
Bounded Recovery 
#################################################################





Bounded Recovery is a component of Oracle GoldenGate’s Extract process checkpointing facility. It guarantees an efficient recovery after Extract stops for any reason, planned or unplanned, no matter how many open (uncommitted) transactions there were at the time that Extract stopped, nor how old they were. Bounded Recovery sets an upper boundary for the maximum amount of time that it would take for Extract to recover to the point where it stopped and then resume normal processing.


  • BR Begin Recovery Checkpoint:
    This is similar to Standard recovery checkpoint.
    This is the first file that would be required for recovery.
    Whole or parts of transactions are restored by BR from BR files.
    Manually deleting the BR files is not recommended.
  • BR End Recovery Checkpoint:
    The end of bounded recovery is where the extract will begin to process records normally from redo or archive logs.
    This is similar to standard current checkpoint


Bounded Recovery is new feature in OGG 11.1, this is how it works:

A transaction qualifies as long-running if it has been open longer than one Bounded Recovery interval, which is specified with the BRINTERVAL option of the BR parameter.

For example, if the Bounded Recovery interval is four hours, a long-running open transaction is any transaction that started more than four hours ago.

At each Bounded Recovery interval, Extract makes a Bounded Recovery checkpoint, which persists the current state and data of Extract to disk, including the state and data (if any) of long-running transactions. If Extract stops after a Bounded Recovery checkpoint, it will recover from a position within the previous Bounded Recovery interval or at the last Bounded Recovery checkpoint, instead of processing from the log position where the oldest open long-running transaction first appeared, which could be several trail files ago.

 The BR checkpoint information is shown in the SHOWCH output starting with OGG v11.1.1.1



Bounded Recovery is enabled by default for Extract processes and has a 4 hour BR interval. To adjust the BR interval to say 24 hours, use the following syntax in your Extract parameter file:

BR BRINTERVAL 24, BRDIR BR



The default location for BR checkpoint files is the GoldenGate home directory. This can be altered by including a full path:

BR BRINTERVAL 24, BRDIR /ggsdata/brcheckpoint



Manually create  checkpoint : recommended before stopping extract 

send extsha1, br brcheckpoint immediate






#################################################################
Golden Gate  in case of  Dataguard   switchover / Failover 
#################################################################


We only need to cater is there is no lag and checkpoint is done before stopping .
Move trail files to Dr site . get more information from  below Oracle doc

1323670.1    Best Practice - Oracle GoldenGate and Oracle Data Guard - Switchover/Fail-over                                   Operations

1322547.1    Best Practice - Oracle GoldenGate and Oracle Data Guard - Switchover/Fail-over                                     Operations  for GoldenGate


Controlled DR Tests Using Dataguard with Goldengate in the Mix (Doc ID 1672938.1)






#################################################################
Golden Gate  in case of  Falshback 
#################################################################




If database is flashback , we need to  reset  Gg capture process accordingly . Refer Doc ID 1626736.1  for more information


SQL> select min (OLDEST_FLASHBACK_TIME) from v$flashback_database_log;
MIN(OLDEST_FLASHB
-----------------
26-FEB-2017 07:25


ggsci
stop replicat CSCURPCH;


SHUTDOWN IMMEDIATE;
STARTUP MOUNT;
flashback database to timestamp TO_TIMESTAMP( ‘2017-02-26 08:45:00′,’YYYY-MM-DD HH24:MI:SS’);
ALTER DATABASE OPEN READ ONLY;
SHUTDOWN IMMEDIATE
STARTUP MOUNT
ALTER DATABASE OPEN RESETLOGS;

ggsci
alter replicat CSCURPCH, begin 2017-02-26 08:45:00
start replicat CSCURPCH




#################################################################
Tables without  primary Key  --> KEYCOLS 
#################################################################


Ideally we cannot add  tables without having primary key or primary key is disabled . Also if some wanted columns are  not part of primary key  .  In this scenario we can use  KEYCOLS  



TABLE  OWNER.table_name ,KEYCOLS (column1,column2);


Refer Doc ID 1271578.1   for more detail 





#################################################################
Exception and Error Handling 
#################################################################



=>   Defining    reperror

REPERROR (DEFAULT, EXCEPTION)
REPERROR (DEFAULT2, ABEND)
REPERROR (-1, EXCEPTION




=>  Creating exception table

Doc ID 1382092.1)

GoldenGate does not provide a standard exceptions handler. By default, a Replicat process will abend should any operational failure occur, and will rollback the transaction to the last known checkpoint. This may not be ideal in a production environment.
The HANDLECOLLISIONS and NOHANDLECOLLISIONS parameters can be used to control whether or not Replicat tries to resolve duplicate-record and missing-record errors, but should these errors be ignored?
To determine what error has occurred, by which Replicat, caused by what data, create an Exceptions handler that will trap and log the specified Oracle error(s), but allow the Replicat to continue to process data.

Here is an example
drop table ggddlusr.exceptions
/
create table ggddlusr.exceptions
(rep_name varchar2(8),
table_name varchar2(61),
errno number,
dberrmsg varchar2(4000),
optype varchar2(20),
errtype varchar2(20),
logrba number,
logposition number,
committimestamp timestamp)
/

replicat r1
SETENV (ORACLE_HOME="/oracle/software/rdbms/11.2.0.2")
SETENV (ORACLE_SID="test")
SETENV (NLS_LANG = "AMERICAN_AMERICA.UTF8")
userid ggddlusr,password test

--Start of the Macro
MACRO #exception_handler
BEGIN
, TARGET ggddlusr.exceptions
, colmap ( rep_name = "R1"
, table_name = @GETENV ("GGHEADER", "TABLENAME")
, errno = @GETENV ("LASTERR", "DBERRNUM")
, dberrmsg = @GETENV ( "LASTERR", "DBERRMSG")
, optype = @GETENV ( "LASTERR", "OPTYPE")
, errtype = @GETENV ( "LASTERR", "ERRTYPE")
, logrba = @GETENV ( "GGHEADER", "LOGRBA")
, logposition = @GETENV ( "GGHEADER", "LOGPOSITION")
, committimestamp = @GETENV ( "GGHEADER", "COMMITTIMESTAMP") )
, INSERTALLRECORDS
, EXCEPTIONSONLY;
END;
--End of the Macro

Reportcount every 30 Minutes, Rate
Report at 01:00
ReportRollover at 01:15
discardfile /oracle/software/goldengate/11.1.1.1/dirrpt/r1.dsc, megabytes 50, append
DDL INCLUDE MAPPED &
EXCLUDE OBJNAME "GGDDLUSR.EXCEPTIONS"
AssumeTargetDefs
REPERROR (DEFAULT, EXCEPTION)
REPERROR (DEFAULT2, ABEND)
Map pubs.*, target pubs.*;

MAP pubs.* #exception_handler()




#################################################################
Golden Gate  in Rac Environment
#################################################################

Apart from normal GG installation    for  Rac gg installation 2 main task is   to  decide  cluster filesystem  and  to add gg in   Cluster as application resource

Also in Rac  configuration we need to move  dirpcs  to shared  drive / distributed file system environment   like dirprm , dirdat ,  dirchk  ,  dirtmp  , dirrpt .     Goldengate instance will be active on one node and in case of a node reboot or node eviction or node is down for a scheduled maintenance, the Goldengate process should automatically fail over to the other surviving nodes. 


>> Extract can only run against one instance

>>  If instance fails,
Manager must be stopped on failed node:
Manager and extract must be started on a surviving node

>> Failover can be configured in Oracle Grid  Infrastructure



Not covering ACFS creation in this blog

Adding GG  in  Cluster as application resource :

For adding GG  resource to cluster we need to first  create application vip resource .

Update the below vip in the /etc/hosts file on both the nodes , ( the vip should be on the same 
subnet of the public ip).


--> add Vip

cd /optware/grid/11.2.0.4/bin
./appvipcfg create -network=1 -ip=<x.x.x.x> -vipname=pggs-vip -user=root
./crsctl setperm resource pggs-vip -u user:oracle:r-x

 cd /optware/grid/11.2.0.4/bin
./crsctl status resource pggs-vip -t
./crsctl start resource pggs-vip -c  hostname  
./crsctl status resource pggs-vip -t




--> deploy agent script 

Oracle Clusterware runs resource-specific commands through an entity called an agent.
The agent script must be able to accept 5 parameter values: start, stop, check, clean and abort (optional).


Now we will create an script to  and will also place the script in the shared location, here we have placed the script
under the gg home which will be accessed on both the nodes. (This is the sample script provided by oracle we can also have a customized script as per our requirement).


GG_HOME
mkdir crs_gg_script
cd crs_gg_script
vi gg_action.scr
chmod 750 gg_action.scr

cd /optware/grid/11.2.0.4/bin/
ls -ltr  /ggdata/csgg/crs_gg_script/gg_action.scr


#!/bin/sh
#goldengate_action.scr
. ~oracle/.bash_profile
[ -z "$1" ]&& echo "ERROR!! Usage $0 "&& exit 99
GGS_HOME=/golden_gate
#specify delay after start before checking for successful start
start_delay_secs=5
#Include the Oracle GoldenGate home in the library path to start GGSCI
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${GGS_HOME}
#set the oracle home to the database to ensure Oracle GoldenGate will get
#the right environment settings to be able to connect to the database
export ORACLE_HOME=/u01/app/oracle/product/11.2/db
export CRS_HOME=/grid/11.2
#Set NLS_LANG otherwise it will default to US7ASCII
export NLS_LANG=AMERICAN_AMERICA.AL32UTF8
logfile=/tmp/crs_gg_start.log
###########################
function log
###########################
{
DATETIME=`date +%d/%m/%y-%H:%M:%S`
echo $DATETIME "goldengate_action.scr>>" $1
echo $DATETIME "goldengate_action.scr>>" $1 >> $logfile
}
#check_process validates that a manager process is running at the PID
#that Oracle GoldenGate specifies.
check_process () {
dt=`date +%d/%m/%y-%H:%M:%S`
if ( [ -f "${GGS_HOME}/dirpcs/MGR.pcm" ] )
then
pid=`cut -f8 "${GGS_HOME}/dirpcs/MGR.pcm"`
if [ ${pid} = `ps -e |grep ${pid} |grep mgr |awk '{ print $1 }'` ]
then
#manager process is running on the PID . exit success
echo $dt "manager process is running on the PID . exit success">> /tmp/check.out
exit 0
else
#manager process is not running on the PID
echo $dt "manager process is not running on the PID" >> /tmp/check.out
exit 1
fi
else
#manager is not running because there is no PID file
echo $ dt"manager is not running because there is no PID file" >> /tmp/check.out
exit 1
fi
}
#call_ggsci is a generic routine that executes a ggsci command
call_ggsci () {
log "entering call_ggsci"
ggsci_command=$1
#log "about to execute $ggsci_command"
log "id= $USER"
cd ${GGS_HOME}
ggsci_output=`${GGS_HOME}/ggsci << EOF
${ggsci_command}
exit
EOF`
log "got output of : $ggsci_output"
}
case $1 in
'start')
#Updated by Sourav B (02/10/2011)
# During failover if the “mgr.pcm” file is not deleted at the node crash
# then Oracle clusterware won’t start the manager on the new node assuming the
# manager process is still running on the failed node. To get around this issue
# we will delete the “mgr.prm” file before starting up the manager on the new
# node. We will also delete the other process files with pc* extension and to
# avoid any file locking issue we will first backup the checkpoint files and then
# delete them from the dirchk directory.After that we will restore the checkpoint
# files from backup to the original location (dirchk directory).
log "removing *.pc* files from dirpcs directory..."
cd $GGS_HOME/dirpcs
rm -f *.pc*
log "creating tmp directory to backup checkpoint file...."
cd $GGS_HOME/dirchk
mkdir tmp
log "backing up checkpoint files..."
cp *.cp* $GGS_HOME/dirchk/tmp
log "Deleting checkpoint files under dirchk......"
rm -f *.cp*
log "Restore checkpoint files from backup to dirchk directory...."
cp $GGS_HOME/dirchk/tmp/*.cp* $GGS_HOME/dirchk
log "Deleting tmp directory...."
rm -rf tmp
log "starting manager"
call_ggsci 'start manager'
#there is a small delay between issuing the start manager command
#and the process being spawned on the OS . wait before checking
log "sleeping for start_delay_secs"
sleep ${start_delay_secs}
#check whether manager is running and exit accordingly
check_process
;;
'stop')
#attempt a clean stop for all non-manager processes
call_ggsci 'stop er *'
#ensure everything is stopped
call_ggsci 'stop er *!'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
'check')
check_process
exit 0
;;
'clean')
#attempt a clean stop for all non-manager processes
call_ggsci 'stop er *'
#ensure everything is stopped
call_ggsci 'stop er *!'
#in case there are lingering processes
call_ggsci 'kill er *'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
'abort')
#ensure everything is stopped
call_ggsci 'stop er *!'
#in case there are lingering processes
call_ggsci 'kill er *'
#stop manager without (y/n) confirmation
call_ggsci 'stop manager!'
#exit success
exit 0
;;
esac





--> add  gg  to cluster

./crsctl add resource ggateapp \
-type cluster_resource \
-attr "ACTION_SCRIPT= /ggdata/csgg/crs_gg_script/gg_action.scr, \
CHECK_INTERVAL=30, \
START_DEPENDENCIES='hard(pggs-vip,ora.asm) pullup(pggs-vip)',  \
STOP_DEPENDENCIES='hard(pggs-vip)'"

./crsctl status resource ggateapp -t
./crsctl start resource ggateapp -c  hostname
./crsctl status resource ggateapp -t



-->Testing

cd /optware/grid/11.2.0.4/bin
./crsctl relocate resource ggateapp -f
./crsctl status resource ggateapp -t
./crsctl status resource pggs-vip -t


cd /optware/grid/11.2.0.4/bin/
./crs_relocate -f ggateapp -f
./crsctl status resource ggateapp -t
./crsctl status resource pggs-vip -t






References :

https://all-database-soultions.blogspot.com/2022/11/install-and-configure-gg-monitoring.html