Saturday, July 31, 2021
Oracle Database Migration -- Transportable Tablespace
Friday, July 23, 2021
Oracle maintenance window -- Change scheduled Window time
In oracle Database 10g, we had only the first two automatic maintenance tasks mentioned below. With Oracle 11g, we have a third task name “Automatic SQL Tunning Advisor”
Automatic Optimizer Statistics Collection:
Automatic Segment Advisor:
Identifies segments that have space available for reclamation, and makes recommendations on how to de-fragment those segments.
Automatic SQL Tuning Advisor :
Examines the performance of high-load SQL statements, and makes recommendations on how to tune those statements.
To view the task, run the below SQL
SQL> select client_name from DBA_AUTOTASK_CLIENT;
CLIENT_NAME
----------------------------------------------------------------
auto optimizer stats collection
auto space advisor
sql tuning advisor
SELECT client_name, status, consumer_group, window_group
FROM dba_autotask_client
ORDER BY client_name;
CLIENT_NAME STATUS
---------------------------------------------------------------- --------
auto optimizer stats collection ENABLED
auto space advisor ENABLED
sql tuning advisor ENABLED
set line 400
col window_name format a20
select window_name,window_next_time from DBA_AUTOTASK_WINDOW_CLIENTS;
select window_name, REPEAT_INTERVAL, DURATION, NEXT_START_DATE from DBA_SCHEDULER_WINDOWS;
WINDOW_NAME WINDOW_NEXT_TIME
------------------------------ --------------------------------------
MONDAY_WINDOW 14-MAY-12 10.00.00.000000 PM EST5EDT
TUESDAY_WINDOW 08-MAY-12 10.00.00.000000 PM EST5EDT
WEDNESDAY_WINDOW 09-MAY-12 10.00.00.000000 PM EST5EDT
THURSDAY_WINDOW 10-MAY-12 10.00.00.000000 PM EST5EDT
FRIDAY_WINDOW 11-MAY-12 10.00.00.000000 PM EST5EDT
SATURDAY_WINDOW 12-MAY-12 06.00.00.000000 AM EST5EDT
SUNDAY_WINDOW 13-MAY-12 06.00.00.000000 AM EST5EDT
TEST_WINDOW 09-MAY-12 05.00.00.000000 AM EST5EDT
DBMS_SCHEDULER.set_attribute (
name => 'MONDAY_WINDOW',
attribute => 'repeat_interval',
VALUE => 'freq=daily;byday=MON;byhour=14;byminute=0;bysecond=0');
END;
/
BEGIN
DBMS_SCHEDULER.set_attribute (
name => 'TUESDAY_WINDOW',
attribute => 'repeat_interval',
VALUE => 'freq=daily;byday=TUE;byhour=14;byminute=0;bysecond=0');
END;
/
BEGIN
DBMS_SCHEDULER.set_attribute (
name => 'WEDNESDAY_WINDOW',
attribute => 'repeat_interval',
VALUE => 'freq=daily;byday=WED;byhour=14;byminute=0;bysecond=0');
END;
/
BEGIN
DBMS_SCHEDULER.set_attribute (
name => 'THURSDAY_WINDOW',
attribute => 'repeat_interval',
VALUE => 'freq=daily;byday=THU;byhour=14;byminute=0;bysecond=0');
END;
/
BEGIN
DBMS_SCHEDULER.set_attribute (
name => 'FRIDAY_WINDOW',
attribute => 'repeat_interval',
VALUE => 'freq=daily;byday=FRI;byhour=14;byminute=0;bysecond=0');
END;
/
BEGIN
DBMS_SCHEDULER.set_attribute (
name => 'SATURDAY_WINDOW',
attribute => 'repeat_interval',
VALUE => 'freq=daily;byday=SAT;byhour=14;byminute=0;bysecond=0');
END;
/
BEGIN
DBMS_SCHEDULER.set_attribute (
name => 'SUNDAY_WINDOW',
attribute => 'repeat_interval',
VALUE => 'freq=daily;byday=SUN;byhour=14;byminute=0;bysecond=0');
END;
/
To create a maintenance window, you must create a Scheduler window and then add it to the window group MAINTENANCE_WINDOW_GROUP
The DBMS_SCHEDULER PL/SQL package provides the ADD_WINDOW_GROUP_MEMBER subprogram, which adds a window to a window group
BEGIN
dbms_scheduler.create_window(
window_name => 'TEST_WINDOW',
duration => numtodsinterval(1, 'hour'),
resource_plan => 'DEFAULT_MAINTENANCE_PLAN',
repeat_interval => 'FREQ=DAILY;BYHOUR=5;BYMINUTE=0;BYSECOND=0');
dbms_scheduler.add_window_group_member(
group_name => 'MAINTENANCE_WINDOW_GROUP',
window_list => 'TEST_WINDOW');
END;
DBMS_AUTO_TASK_ADMIN PL/SQL package can be used to do the following.
To enable the task:
BEGIN
dbms_auto_task_admin.disable(
client_name => 'sql tuning advisor',
operation => NULL,
window_name => NULL);
END;
To enable it
BEGIN
dbms_auto_task_admin.enable(
client_name => 'sql tuning advisor',
operation => NULL,
window_name => NULL);
END;
Reference :
How to Change The Automated Maintenance Tasks Weekend Schedule (Doc ID 2036557.1)
Sunday, July 18, 2021
Oracle Database Services - Types and Relation with load balancing advisor
srvctl add service -d QWIC -s CUHISTORY_SRVC -r QWIC1 -a QWIC2
srvctl add service -d db_unique_name -s service_name -r preferred_list [-a available_list]
srvctl config database -d ORCL
srvctl status service -d CCIHC
srvctl modify service -d PRDBC -s BATCHRPT_SRVC -n -i PRDBC2,PRDBC3 -a PRDBC
srvctl relocate service -d PRDBC -s BATCHRPT_SRVC -i PRDBC4 -t PRDBC2
crsctl relocate resource resource_name –n node_name
srvctl start service -d QGISC -s ora.QGISC.INS_FGEA.QGISC2.srv
srvctl start service -d PRDBC -s BATCHRPT_SRVC -i PRDBC3
srvctl start service –d pmpic –s pmpic_gen
srvctl stop service -d QGISC -s ora.QGISC.INS_FGEA.QGISC2.srv
srvctl stop service –d pmpic –s pmpic_gen
crsctl stop resource ora.QGISC.INS_WFC.QGISC2.srv
crsctl stop resource ora.QGISC.QGISC_INS_FGEA.QGISC2.srv
SELECT username, program, machine, service_name FROM gv$session;
select name,network_name,enabled , failover_method,failover_type ,failover_retries
,failover_delay from dba_services ;
select name,network_name,enabled ,goal,min_cardinality,max_cardinality from dba_services ;
select inst_id, name,network_name,blocked from gv$active_services ;
select service_id, name , goal from v$active_services;
show parameter service_name
SELECT sid, serial#,
client_identifier, service_name, action, module
FROM V$SESSION
select inst_id,service_id,name,network_name
from gv$services
order by 1,2;
pause
select inst_id,service_id,name,network_name
from gv$active_services
order by 1,2;
col name for a16
col network_name for a16
col failover_method for a16
col failover_type for a16
col service_name for a20
col stat_name for a35
pause see all services that are defined
select service_id,name,network_name,creation_date,
failover_type,failover_method
from dba_services;
select service_name,stat_name,value
from v$service_stats
order by service_name
/
select INST_ID,VALUE from gv$parameter where NAME='service_names';
alter system set service_names='CFEED_SRVC, BATCHRPT_SRVC,PRDBC.envoy.net, DBR_SRVC'
scope=memory sid='PRDBC2';
select inst_id, username, program , logon_time from gv$session where service_name =
'SLICER_SRVC' order by inst_id ;
lsnrctl services > ab.log
lsnrctl services | grep service2
srvctl status service -d PRDBC
srvctl config service -d PRDBC
srvctl modify service -d PRDBC -s BATCHRPT_SRVC -n -i PRDBC2,PRDBC3 -a PRDBC4
srvctl relocate service -d PRDBC -s BATCHRPT_SRVC -i PRDBC4 -t PRDBC2
srvctl start service -d PRDBC -s BATCHRPT_SRVC -i PRDBC3
Typically, there are two types of load balancing:
Connection Load Balancing (CLB)
Run-time Load Balancing (RTLB)
RTLB has come into existence from 10gR2 which can be either set to “SERVICE_TIME” or “THROUGHPUT”. CLB can be configured at Client-Side or at Server-Side. Of which, Server-Side load balancing is recommended and have better functionality over Client-Side.
When a service is created by default Connection Load Balancing is enabled and set to “LONG” and Run-Time Load Balancing is disabled. Irrespective of Connection Load Balancing Goal “SHORT” or “LONG” you can enable Runtime Load Balancing Goal to “SERVICE_TIME” or “THROUGHPUT”.
With Connection Load Balancing goal set to LONG, do not configure Run-time Load Balancing as it is only applicable to applications where next session or job starts only after the current one ends which is not practical. This is the reason you must have read that Run-time Load Balancing must be enabled with CLB goal set to SHORT.
[oracle@RAC1 ~]$ srvctl modify service -d RACDB -s LBASRV1 -B SERVICE_TIME
[oracle@RAC1 ~]$ srvctl config service -d RACDB -s LBASRV1
[oracle@RAC1 ~]$ srvctl modify service -d RACDB -s LBASRV1 -B THROUGHPUT
[oracle@RAC1 ~]$ srvctl config service -d RACDB -s
Disabling CLB on server-side is equivalent to disabling LBA (Load Balancing Advisory). You will not find an option to do it if you search in “srvctl” help.
Oracle document says that configuring GOAL to NONE will disable Load Balancing Advisory (LBA) on the service. Let us try doing it and see what exactly it is.
I will try to use DBMS_SERVICE package to modify this service and to disable LBA.
SQL> EXECUTE DBMS_SERVICE.MODIFY_SERVICE (service_name => ‘LBASRV1’, goal => DBMS_SERVICE.GOAL_NONE);
SQL> select GOAL,CLB_GOAL from dba_services where name=’LBASRV1′;
GOAL CLB_G
———— —–
NONE LONG
But as soon as we restart the service using srvctl GOAL has come back to THROUGHPUT.
[oracle@RAC1 ~]$ srvctl stop service -s LBASRV1 -d RACDB
[oracle@RAC1 ~]$ srvctl start service -s LBASRV1 -d RACDB
SQL> select GOAL,CLB_GOAL from dba_services where name=’LBASRV1′;
GOAL CLB_G
———— —–
THROUGHPUT LONG
select user_data from sys.sys$service_metrics_tab order by enq_time;
exec dbms_service.start_service('rdbprod.envoy.net');
exec dbms_service.stop_service('bb', dbms_service.all_instances)
dbms_service.create_service (
service_name =>'ABC' , network_name => 'src_net_name' ,
failover_type => DBMS_SERVICE.FAILOVER_TYPE_SELECT, -
failover_retries => 10, -
failover_delay => 1, -
) ;
dbms_service.start_service('abc',dbms_service.all_instances);
end ;
/
service_name => 'o11gr1', -
goal => DBMS_SERVICE.GOAL_THROUGHPUT, -
failover_method => DBMS_SERVICE.FAILOVER_METHOD_BASIC, -
failover_type => DBMS_SERVICE.FAILOVER_TYPE_SELECT, -
failover_retries => 10, -
failover_delay => 1, -
clb_goal => DBMS_SERVICE.CLB_GOAL_LONG);
BEGIN
DBMS_SCHEDULER.create_job_class(
job_class_name => 'OLTP_JOB_CLASS',
service => 'OLTP_SERVICE');
job_class_name => 'BATCH_JOB_CLASS',
service => 'BATCH_SERVICE');
END;
/
GRANT EXECUTE ON sys.oltp_job_class TO my_user;
GRANT EXECUTE ON sys.batch_job_class TO my_user;
SQL> select * from DBA_SCHEDULER_JOB_CLASSES;
SQL> exec DBMS_SCHEDULER.CREATE_JOB_CLASS (job_class_name => 'AUDITING_JOB_CLASS', service
=> 'AUDITING_SERVICE', comments => 'THIS IS AUDIT SERVICE JOB CLASS');
PL/SQL procedure successfully completed.
BEGIN
DBMS_SCHEDULER.create_job (
job_name => 'my_user.oltp_job_test',
job_type => 'PLSQL_BLOCK',
job_action => 'BEGIN NULL; END;',
start_date => SYSTIMESTAMP,
repeat_interval => 'FREQ=DAILY;',
job_class => 'SYS.OLTP_JOB_CLASS',
end_date => NULL,
enabled => TRUE,
comments => 'Job linked to the OLTP_JOB_CLASS.');
END;
/
EXEC DBMS_SCHEDULER.set_attribute('MY_BATCH_JOB', 'JOB_CLASS', 'BATCH_JOB_CLASS');
col NAME for a35
col NETWORK_NAME for a35
col FAILOVER_METHOD for a30
, aq_ha_notifications => true -
, failover_method => dbms_service.failover_method_basic -
, failover_type => dbms_service.failover_type_select -
, failover_retries =>180 -
, failover_delay =>5 -
)
;
procedure disconnect_session( service_name in varchar2);
procedure create_service( service_name in varchar2, network_name in varchar2, goal in number, dtp in boolean, aq_ha_notifications in boolean, failover_method in varchar2, failover_type in varchar2, failover_retries in number, failover_delay in number, clb_goal in number);
procedure modify_service( service_name in varchar2, goal in number, dtp in boolean, aq_ha_notifications in boolean, failover_method in varchar2, failover_type in varchar2, failover_retries in number, failover_delay in number, clb_goal in number);
procedure delete_service( service_name in varchar2);
procedure start_service( service_name in varchar2, instance_name in varchar2);
procedure stop_service( service_name in varchar2, instance_name in varchar2);
(DESCRIPTION =
(LOAD_BALANCE = ON)
(FAILOVER = ON)
(ADDRESS = (PROTOCOL = TCP)(HOST = server01)(PORT = 1521))
(ADDRESS = (PROTOCOL = TCP)(HOST = server02)(PORT = 1521))
(ADDRESS = (PROTOCOL = TCP)(HOST = server03)(PORT = 1521))
(CONNECT_DATA =
(SERVICE_NAME = OLTP_SERVICE)
(FAILOVER_MODE =
(TYPE = SELECT)
(METHOD = BASIC)
(RETRIES = 20)
(DELAY = 1)
)
)
)
(DESCRIPTION =
(LOAD_BALANCE = ON)
(FAILOVER = ON)
(ADDRESS = (PROTOCOL = TCP)(HOST = server01)(PORT = 1521))
(ADDRESS = (PROTOCOL = TCP)(HOST = server02)(PORT = 1521))
(ADDRESS = (PROTOCOL = TCP)(HOST = server03)(PORT = 1521))
(CONNECT_DATA =
(SERVICE_NAME = BATCH_SERVICE)
(FAILOVER_MODE =
(TYPE = SELECT)
(METHOD = BASIC)
(RETRIES = 20)
(DELAY = 1)
)
)
)
(DESCRIPTION =
(ADDRESS = (PROTOCOL = TCP)(HOST = rac1-vip.hclt.com)(PORT = 1521))
(ADDRESS = (PROTOCOL = TCP)(HOST = rac2-vip.hclt.com)(PORT = 1521))
(LOAD_BALANCE = no)
(CONNECT_DATA =
(SERVER = DEDICATED)
(SERVICE_NAME = devdb_oltp)
)
)
(DESCRIPTION =
(ADDRESS = (PROTOCOL = TCP)(HOST = rac1-vip.hclt.com)(PORT = 1521))
(ADDRESS = (PROTOCOL = TCP)(HOST = rac2-vip.hclt.com)(PORT = 1521))
(LOAD_BALANCE = yes)
(CONNECT_DATA =
(SERVER = DEDICATED)
(SERVICE_NAME = devdb_oltp)
(FAILOVER_MODE =
(TYPE = SELECT)
(METHOD = BASIC)
(RETRIES = 180)
(DELAY = 5)
)
)
)
Note for DBA TEAM: application teams use application specific RAC services to connect to the database.
Following are the tnsname entries they use SS indicates Shared Server and non SS means Dedicated Server.
MARGIN-PRD.world, MARGIN-SS-PRD.world, RISKVIEW-PRD.world,RISKVIEW-SS-PRD.world,
RISKCALC-PRD.world,RISKCALC-SS-PRD.world,MULTIVAL-PRD.world,MULTIVAL-SS-PRD.world,
VARMARGIN-PRD.world,VARMARGIN-SS-PRD.world,CMA-PRD.world,CMA-SS-PRD.world.
If application team complains about connection issue to the database first thing ensure
archive area is not full. Then try connecting using above tnsname entries from global tnsnames.
If you get error like listener currently does not know service specified then follow
below step to start the services :
1. check if both the instances are up and running.
2. check all the application specific RAC services are up and running as below : (This command can be run from any node)
xstmc021001por(oracle):MFXPRDI1:misc$ $GRID_HOME/bin/srvctl status service -d MFXPRD_xstmc021 -s "MARGIN,RISKVIEW,RISKCALC,MULTIVAL,VARMARGIN,CMA"
Service MARGIN is running on instance(s) MFXPRDI2
Service RISKVIEW is running on instance(s) MFXPRDI1
Service RISKCALC is running on instance(s) MFXPRDI1
Service MULTIVAL is running on instance(s) MFXPRDI1
Service VARMARGIN is running on instance(s) MFXPRDI1
Service CMA is running on instance(s) MFXPRDI1
3. If not then bring the services on specific instance as per above list.
Please note MARGIN service runs on MFXPRDI2 and rest all run on MFXPRDI1
eg, to bring up one service as
$GRID_HOME/bin/srvctl start service -d MFXPRD_xstmc021 -s CMA -i MFXPRDI1
to bring up all the services in one command
$GRID_HOME/bin/srvctl start service -d MFXPRD_xstmc021 -s "CMA,MULTIVAL,RISKCALC,RISKVIEW,VARMARGIN" -i MFXPRDI1
$GRID_HOME/bin/srvctl start service -d MFXPRD_xstmc021 -s MARGIN -i MFXPRDI2
4. Wait for sometime and Run listener services from both the nodes and check all the services are registered to listener with shared servers (dispatcher entries in listener services output).
TNS_ADMIN=$GRID_HOME/network/admin $GRID_HOME/bin/lsnrctl services listener_mfxprd
5. If not then check service_names parameter for both the instances. And ensure above services are properly registered.
If not then use below command to add services to service_name
ALTER SYSTEM SET service_names='CMA','RISKVIEW','RISKCALC','MULTIVAL','VARMARGIN','MFXPRD' SCOPE=BOTH SID='MFXPRDI1';
ALTER SYSTEM SET service_names='MARGIN','MFXPRD' SCOPE=BOTH SID='MFXPRDI2';
Measuring Performance by Service Using the Automatic Workload Repository
Services add a new dimension for performance tuning. With services, workloads are visible and measurable, and therefore resource consumption and wait times are attributable by application. Tuning by using "service and SQL" replaces tuning by "session and SQL" in the majority of systems where all sessions are anonymous and shared.
The AWR maintains performance statistics that include information about response time, throughput, resource consumption, and wait events for all services and work that a database performs. Oracle Database also maintains metrics, statistics, wait events, wait classes, and SQL-level traces for services. You can optionally augment these statistics by defining modules within your application to monitor certain statistics. You can also define the actions within those modules that business critical transactions should execute in response to particular statistical values.
/* Enabling Gather statiscitc on Service-Module-Action combination */
-- to display currently connected services, modules and actions
select SID, USERNAME, SERVICE_NAME, MODULE, ACTION from V$SESSION
where SERVICE_NAME in ('hrserv','oeserv')
-- service name and module name are mandatory
begin
DBMS_MONITOR.SERV_MOD_ACT_STAT_ENABLE(SERVICE_NAME => 'hrserv',
MODULE_NAME=>'PAYROLL',
ACTION_NAME => 'EXCEPTIONS PAY');
end;
/
-- gather stats for PAYROLL module and ACTION whose name is null
begin
DBMS_MONITOR.SERV_MOD_ACT_STAT_ENABLE(SERVICE_NAME => 'hrserv',
Page 489 Oracle DBA Code Examples
MODULE_NAME=>'PAYROLL',
ACTION_NAME => NULL);
end;
/
-- gather stats for PAYROLL module and All its ACTIONs
begin
DBMS_MONITOR.SERV_MOD_ACT_STAT_ENABLE(SERVICE_NAME => 'hrserv',
MODULE_NAME=>'PAYROLL',
ACTION_NAME => '###ALL_ACTIONS');
end;
/
-- to view enabled monitorings
-- types: SERVICE, SERVICE_MODULE, SERVICE_MODULE_ACTION
select A.AGGREGATION_TYPE, A.PRIMARY_ID , A.QUALIFIER_ID1 , A.QUALIFIER_ID2
from DBA_ENABLED_AGGREGATIONS a
-- to view gathered stats
select S.AGGREGATION_TYPE, S.SERVICE_NAME, S.MODULE, S.ACTION, N.CLASS,
decode(n.CLASS,
'1','User','2','Redo','4','Enqueue','8','Cache','16','OS','32','RAC','64','SQL
','128','Debug', N.CLASS) STAT_CLASS,
S.STAT_NAME, S.VALUE
from V$SERV_MOD_ACT_STATS s, V$STATNAME n
where S.STAT_ID = N.STAT_ID
order by N.CLASS, S.STAT_ID
-- call times and performance statistics views:
V$SERVICE_STATS
V$SERVICE_EVENTS
V$SERVICE_WAIT_CLASSES
V$SERVICEMETRIC
V$SERVICEMETRIC_HISTORY
/* To Disable Cumulative Stats */
-- stats will be removed from V$SERV_MOD_ACT_STATS
begin
DBMS_MONITOR.SERV_MOD_ACT_STAT_DISABLE(SERVICE_NAME => 'hrserv',
MODULE_NAME=>'PAYROLL',
ACTION_NAME => 'EXCEPTIONS PAY');
end;
/
/* Service Quality Statistics */
-- script from Oracle documentation
-- provides service quality statistics every five seconds
SET PAGESIZE 60 COLSEP '|' NUMWIDTH 8 LINESIZE 132 VERIFY OFF FEEDBACK OFF
COLUMN service_name FORMAT A20 TRUNCATED HEADING 'Service'
COLUMN begin_time HEADING 'Begin Time' FORMAT A10
COLUMN end_time HEADING 'End Time' FORMAT A10
COLUMN instance_name HEADING 'Instance' FORMAT A10
COLUMN service_time HEADING 'Service Time|mSec/Call' FORMAT 999999999
COLUMN throughput HEADING 'Calls/sec'FORMAT 99.99
BREAK ON service_name SKIP 1
SELECT
Page 490 Oracle DBA Code Examples
service_name
, TO_CHAR(begin_time, 'HH:MI:SS') begin_time
, TO_CHAR(end_time, 'HH:MI:SS') end_time
, instance_name
, elapsedpercall service_time
, callspersec throughput
FROM gv$instance i
, gv$active_services s
, gv$servicemetric m
WHERE s.inst_id = m.inst_id
AND s.name_hash = m.service_name_hash
AND i.inst_id = m.inst_id
AND m.group_id = 10
ORDER BY service_name , i.inst_id , begin_time ;
Reference :
https://docs.oracle.com/en/database/oracle/oracle-database/12.2/racad/workload-management-with-dynamic-database-services.html#GUID-095B67FB-3E3A-44BE-84A4-321174015A08
https://docs.oracle.com/database/121/RACAD/GUID-559FB230-857B-4D97-B36A-F4F76C3A1D47.htm#RACAD7127
Saturday, July 10, 2021
Oracle HangAnalyze and SystemState Dump
Let see some insight . Below is how i personally capture hanganalyze and system state dump . Will explain in detail for each component in trail blog .
Collection commands for Hanganalyze and Systemstate: RAC with fixes for bug 11800959 and bug 11827088
For 11g:
Sqlplus '/as sysdba'
Oradebug setorapname reco
Oradebug unlimit
Oradebug-g all hanganalyze 3
Oradebug-g all dump systemstate 267
oradebug tracefile_name
…….. Wait at least 1 min
Oradebug -g all hanganalyze 3
Oradebug-g all dump systemstate 267
oradebug tracefile_name
Exit
Collection commands for Hanganalyze and Systemstate: RAC without fixes for Bug 11800959 and Bug 11827088
Sqlplus '/as sysdba'
Oradebug setorapname reco
Oradebug unlimit
Oradebug-g all hanganalyze 3
Oradebug-g all dump systemstate 258
…….. Wait at least 1 min
Oradebug-g all hanganalyze 3
Oradebug-g all dump systemstate 258
Exit
For 10g, run oradebug setmypid instead of oradebug setorapname reco:
Sqlplus '/as sysdba'
Oradebug setmypid
Oradebug unlimit
Oradebug-g all hanganalyze 3
Oradebug-g all dump systemstate 258
…….. Wait at least 1 min
Oradebug-g all hanganalyze 3
Oradebug-g all dump systemstate 258
Exit
In RAC environment, a dump will be created for all RAC instances in the DIAG trace file for each instance.
##############################################
##############################################
________________________
sqlplus -prelim / as sysdba
sqlplus /nolog
set _prelim on
connect / as sysdba
##############################################
##############################################
SQL> oradebug setmypid
SQL> oradebug unlimit
SQL> oradebug dump systemstate 266
…….. Wait at least 1 min
SQL> oradebug dump systemstate 266
…….. Wait at lease 1 min
SQL> oradebug dump systemstate 266
SQL> oradebug tracefile_name
With releases 11g & RAC and above You should attach to the DIAG process and change its file size limit.
sqlplus "/ as sysdba"
select SPID from v$process where program like '%DIAG%';
oradebug setospid <OS_PID> -- pid of the diag process
oradebug unlimit
oradebug dump systemstate 266
oradebug tracefile_name
exit
Collection commands for Hanganalyze and Systemstate: RAC
There are 2 bugs affecting RAC that without the relevant patches being applied on your system, make using level 266 or 267 very costly. Therefore without these fixes in place it highly unadvisable to use these level
For information on these patches see:
Document 11800959.8 Bug 11800959-a systemstate dump with level> = 10 in RAC dumps huge busy global cache elements-can hang/crash instances
Document 11827088.8 Bug 11827088-Latch 'gc element' contention, LMHB terminates the instance
Systemstate dump has multiple levels:
2: dump (excluding lock element)
10: dump
11: dump + global cache of RAC
256: short stack (function stack)
258: 256 + 2 --> short stack + dump (excluding lock element)
266: 256 + 10 --> short stack + dump
267: 256 + 11 --> short stack + dump + global cache of RAC
Level 11 and 267 will dump the global cache and generate a large trace file, which is generally not recommended. In general, if the process is not too many, we recommend that you use 266 because it can dump the function stack of the process and analyze what operations the process is performing. However, it takes more than 30 minutes to generate a short stack. If there are many processes, such as 2000. In this case, level 10 or level 258 can be generated. level 258 will collect more short stacks than level 10, but some lock element data will be collected less than level 10.
Although process-related information is collected through system state dump, how to effectively interpret relevant information and diagnose and analyze problems is a great challenge!
Reading and Understanding Systemstate Dumps (Doc ID 423153.1)
How to Collect Systemstate Dumps When you Cannot Connect to Oracle (Doc ID 121779.1)
##############################################
Hanganalyze
_________________________
Logon to sqlplus as sysdba
oradebug setmypid;
oradebug unlimit;
oradebug hanganalyze 3;
oradebug tracefile_name
Rac :
_________________________
Logon to sqlplus as sysdba
oradebug setmypid;
oradebug unlimit;
oradebug setinst all
oradebug -g def hanganalyze 3
oradebug tracefile_name
##############################################
v$wait_chains.
From 11gR2 onwards, oracle has provided a dynamic performance view called v$wait_chains. This also contain same information which we gather by running the hanganalyze command. So instead of using hanganalyze, you can use the below query to find wait chains
This is done using dia0 background processes starts collecting hanganalyze information and stores this in memory in the “hang analysis cache”. It does this every 3 seconds for local hanganalyze information and every 10 seconds for global (RAC) hanganalyze information
There is no gv$ equivalent as v$wait_chains would report on multiple instances in a multi-instance (RAC) environment
Some queries for this view
SQL> SELECT chain_id, num_waiters, in_wait_secs, osid, blocker_osid, substr(wait_event_text,1,30)
FROM v$wait_chains;
SELECT decode( a.blocker_sid , NULL , '<chain id#' ||a.chain_id||'>' ) chain_id,
RPAD( '+' , LEVEL , '-' ) ||a.sid sid,
RPAD( ' ' , LEVEL , ' ' ) ||a.wait_event_text wait_event
FROM V$WAIT_CHAINS a
CONNECT BY PRIOR a.sid=a.blocker_sid
AND PRIOR a.sess_serial#=a.blocker_sess_serial#
AND PRIOR a.instance = a.blocker_instance START WITH a.blocker_is_valid='FALSE'
ORDER BY a.chain_id ,
LEVEL
/
Query for Top 100 wait chain processes
set pages 1000
set lines 120
set heading off
column w_proc format a50 tru
column instance format a20 tru
column inst format a28 tru
column wait_event format a50 tru
column p1 format a16 tru
column p2 format a16 tru
column p3 format a15 tru
column Seconds format a50 tru
column sincelw format a50 tru
column blocker_proc format a50 tru
column fblocker_proc format a50 tru
column waiters format a50 tru
column chain_signature format a100 wra
column blocker_chain format a100 wra
SELECT *
FROM (SELECT 'Current Process: '||osid W_PROC, 'SID '||i.instance_name INSTANCE,
'INST #: '||instance INST,'Blocking Process: '||decode(blocker_osid,null,'',blocker_osid)||
' from Instance '||blocker_instance BLOCKER_PROC,
'Number of waiters: '||num_waiters waiters,
'Final Blocking Process: '||decode(p.spid,null,'',
p.spid)||' from Instance '||s.final_blocking_instance FBLOCKER_PROC,
'Program: '||p.program image,
'Wait Event: ' ||wait_event_text wait_event, 'P1: '||wc.p1 p1, 'P2: '||wc.p2 p2, 'P3: '||wc.p3 p3,
'Seconds in Wait: '||in_wait_secs Seconds, 'Seconds Since Last Wait: '||time_since_last_wait_secs sincelw,
'Wait Chain: '||chain_id ||': '||chain_signature chain_signature,'Blocking Wait Chain: '||decode(blocker_chain_id,null,
'',blocker_chain_id) blocker_chain
FROM v$wait_chains wc,
gv$session s,
gv$session bs,
gv$instance i,
gv$process p
WHERE wc.instance = i.instance_number (+)
AND (wc.instance = s.inst_id (+) and wc.sid = s.sid (+)
and wc.sess_serial# = s.serial# (+))
AND (s.final_blocking_instance = bs.inst_id (+) and s.final_blocking_session = bs.sid (+))
AND (bs.inst_id = p.inst_id (+) and bs.paddr = p.addr (+))
AND ( num_waiters > 0
OR ( blocker_osid IS NOT NULL
AND in_wait_secs > 10 ) )
ORDER BY chain_id,
num_waiters DESC)
WHERE ROWNUM < 101;